You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2012/09/10 20:51:15 UTC
svn commit: r1383039 - in
/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs: ./
src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/
src/main/java/org/apache/hadoop/hdfs/qjournal/client/
src/main/java/org/apache/hado...
Author: todd
Date: Mon Sep 10 18:51:15 2012
New Revision: 1383039
URL: http://svn.apache.org/viewvc?rev=1383039&view=rev
Log:
HDFS-3885. QJM: optimize log sync when JN is lagging behind. Contributed by Todd Lipcon.
Modified:
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumOutputStream.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/BinaryEditsVisitor.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogFileOutputStream.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt Mon Sep 10 18:51:15 2012
@@ -54,3 +54,5 @@ HDFS-3893. QJM: Make QJM work with secur
HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm)
HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
+
+HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java Mon Sep 10 18:51:15 2012
@@ -84,7 +84,7 @@ class BookKeeperEditLogOutputStream
@Override
public void close() throws IOException {
setReadyToFlush();
- flushAndSync();
+ flushAndSync(true);
try {
lh.close();
} catch (InterruptedException ie) {
@@ -130,7 +130,7 @@ class BookKeeperEditLogOutputStream
}
@Override
- public void flushAndSync() throws IOException {
+ public void flushAndSync(boolean durable) throws IOException {
assert(syncLatch != null);
try {
syncLatch.await();
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumOutputStream.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumOutputStream.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumOutputStream.java Mon Sep 10 18:51:15 2012
@@ -77,7 +77,7 @@ class QuorumOutputStream extends EditLog
}
@Override
- protected void flushAndSync() throws IOException {
+ protected void flushAndSync(boolean durable) throws IOException {
int numReadyBytes = buf.countReadyBytes();
if (numReadyBytes > 0) {
int numReadyTxns = buf.countReadyTxns();
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java Mon Sep 10 18:51:15 2012
@@ -301,17 +301,23 @@ class Journal implements Closeable {
if (LOG.isTraceEnabled()) {
LOG.trace("Writing txid " + firstTxnId + "-" + lastTxnId);
}
+
+ // If the edit has already been marked as committed, we know
+ // it has been fsynced on a quorum of other nodes, and we are
+ // "catching up" with the rest. Hence we do not need to fsync.
+ boolean isLagging = lastTxnId <= committedTxnId.get();
+ boolean shouldFsync = !isLagging;
curSegment.writeRaw(records, 0, records.length);
curSegment.setReadyToFlush();
Stopwatch sw = new Stopwatch();
sw.start();
- curSegment.flush();
+ curSegment.flush(shouldFsync);
sw.stop();
metrics.addSync(sw.elapsedTime(TimeUnit.MICROSECONDS));
-
- if (committedTxnId.get() > lastTxnId) {
+
+ if (isLagging) {
// This batch of edits has already been committed on a quorum of other
// nodes. So, we are in "catch up" mode. This gets its own metric.
metrics.batchesWrittenWhileLagging.incr(1);
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java Mon Sep 10 18:51:15 2012
@@ -114,7 +114,7 @@ class EditLogBackupOutputStream extends
}
@Override // EditLogOutputStream
- protected void flushAndSync() throws IOException {
+ protected void flushAndSync(boolean durable) throws IOException {
assert out.getLength() == 0 : "Output buffer is not empty";
int numReadyTxns = doubleBuf.countReadyTxns();
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java Mon Sep 10 18:51:15 2012
@@ -176,7 +176,7 @@ public class EditLogFileOutputStream ext
* accumulates new log records while readyBuffer will be flushed and synced.
*/
@Override
- public void flushAndSync() throws IOException {
+ public void flushAndSync(boolean durable) throws IOException {
if (fp == null) {
throw new IOException("Trying to use aborted output stream");
}
@@ -186,7 +186,7 @@ public class EditLogFileOutputStream ext
}
preallocate(); // preallocate file if necessay
doubleBuf.flushTo(fp);
- if (!shouldSkipFsyncForTests) {
+ if (durable && !shouldSkipFsyncForTests) {
fc.force(false); // metadata updates not needed
}
}
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java Mon Sep 10 18:51:15 2012
@@ -93,18 +93,24 @@ public abstract class EditLogOutputStrea
/**
* Flush and sync all data that is ready to be flush
* {@link #setReadyToFlush()} into underlying persistent store.
+ * @param durable if true, the edits should be made truly durable before
+ * returning
* @throws IOException
*/
- abstract protected void flushAndSync() throws IOException;
+ abstract protected void flushAndSync(boolean durable) throws IOException;
/**
* Flush data to persistent store.
* Collect sync metrics.
*/
public void flush() throws IOException {
+ flush(true);
+ }
+
+ public void flush(boolean durable) throws IOException {
numSync++;
long start = now();
- flushAndSync();
+ flushAndSync(durable);
long end = now();
totalTimeSync += (end - start);
}
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java Mon Sep 10 18:51:15 2012
@@ -471,12 +471,12 @@ public class JournalSet implements Journ
}
@Override
- protected void flushAndSync() throws IOException {
+ protected void flushAndSync(final boolean durable) throws IOException {
mapJournalsAndReportErrors(new JournalClosure() {
@Override
public void apply(JournalAndStream jas) throws IOException {
if (jas.isActive()) {
- jas.getCurrentStream().flushAndSync();
+ jas.getCurrentStream().flushAndSync(durable);
}
}
}, "flushAndSync");
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/BinaryEditsVisitor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/BinaryEditsVisitor.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/BinaryEditsVisitor.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/BinaryEditsVisitor.java Mon Sep 10 18:51:15 2012
@@ -56,7 +56,7 @@ public class BinaryEditsVisitor implemen
@Override
public void close(Throwable error) throws IOException {
elfos.setReadyToFlush();
- elfos.flushAndSync();
+ elfos.flushAndSync(true);
elfos.close();
}
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java Mon Sep 10 18:51:15 2012
@@ -1222,7 +1222,7 @@ public class TestEditLog {
elfos.create();
elfos.writeRaw(garbage, 0, garbage.length);
elfos.setReadyToFlush();
- elfos.flushAndSync();
+ elfos.flushAndSync(true);
elfos.close();
elfos = null;
file = new File(TEST_LOG_NAME);
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogFileOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogFileOutputStream.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogFileOutputStream.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogFileOutputStream.java Mon Sep 10 18:51:15 2012
@@ -55,7 +55,7 @@ public class TestEditLogFileOutputStream
static void flushAndCheckLength(EditLogFileOutputStream elos,
long expectedLength) throws IOException {
elos.setReadyToFlush();
- elos.flushAndSync();
+ elos.flushAndSync(true);
assertEquals(expectedLength, elos.getFile().length());
}
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java?rev=1383039&r1=1383038&r2=1383039&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java Mon Sep 10 18:51:15 2012
@@ -74,7 +74,7 @@ public class TestNameNodeRecovery {
elts.addTransactionsToLog(elfos, cache);
elfos.setReadyToFlush();
- elfos.flushAndSync();
+ elfos.flushAndSync(true);
elfos.close();
elfos = null;
file = new File(TEST_LOG_NAME);