You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/08/26 19:33:14 UTC
svn commit: r689148 - in /hadoop/core/trunk: CHANGES.txt
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
Author: dhruba
Date: Tue Aug 26 10:33:14 2008
New Revision: 689148
URL: http://svn.apache.org/viewvc?rev=689148&view=rev
Log:
HADOOP-2330. Preallocate HDFS transaction log to improve performance.
(dhruba and hairong)
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=689148&r1=689147&r2=689148&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Tue Aug 26 10:33:14 2008
@@ -221,6 +221,9 @@
HADOOP-3944. Improve documentation for public TupleWritable class in
join package. (Chris Douglas via enis)
+ HADOOP-2330. Preallocate HDFS transaction log to improve performance.
+ (dhruba and hairong)
+
OPTIMIZATIONS
HADOOP-3556. Removed lock contention in MD5Hash by changing the
Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=689148&r1=689147&r2=689148&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Tue Aug 26 10:33:14 2008
@@ -26,9 +26,11 @@
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.lang.Math;
import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
@@ -44,6 +46,7 @@
*
*/
public class FSEditLog {
+ private static final byte OP_INVALID = -1;
private static final byte OP_ADD = 0;
private static final byte OP_RENAME = 1; // rename
private static final byte OP_DELETE = 2; // delete
@@ -105,14 +108,17 @@
private FileChannel fc; // channel of the file stream for sync
private DataOutputBuffer bufCurrent; // current buffer for writing
private DataOutputBuffer bufReady; // buffer ready for flushing
+ static ByteBuffer fill = ByteBuffer.allocateDirect(512); // preallocation
EditLogFileOutputStream(File name) throws IOException {
super();
file = name;
bufCurrent = new DataOutputBuffer(sizeFlushBuffer);
bufReady = new DataOutputBuffer(sizeFlushBuffer);
- fp = new FileOutputStream(name, true); // open for append
- fc = fp.getChannel();
+ RandomAccessFile rp = new RandomAccessFile(name, "rw");
+ fp = new FileOutputStream(rp.getFD()); // open for append
+ fc = rp.getChannel();
+ fc.position(fc.size());
}
@Override
@@ -141,6 +147,7 @@
@Override
void create() throws IOException {
fc.truncate(0);
+ fc.position(0);
bufCurrent.writeInt(FSConstants.LAYOUT_VERSION);
setReadyToFlush();
flush();
@@ -158,7 +165,11 @@
}
bufCurrent.close();
bufReady.close();
+
+ // remove the last INVALID marker from transaction log.
+ fc.truncate(fc.position());
fp.close();
+
bufCurrent = bufReady = null;
}
@@ -167,8 +178,9 @@
* New data can be still written to the stream while flushing is performed.
*/
@Override
- void setReadyToFlush() {
+ void setReadyToFlush() throws IOException {
assert bufReady.size() == 0 : "previous data is not flushed yet";
+ write(OP_INVALID); // insert end-of-file marker
DataOutputBuffer tmp = bufReady;
bufReady = bufCurrent;
bufCurrent = tmp;
@@ -181,9 +193,11 @@
*/
@Override
protected void flushAndSync() throws IOException {
+ preallocate(); // preallocate file if necessary
bufReady.writeTo(fp); // write data to file
bufReady.reset(); // erase all data in the buffer
- fc.force(true); // sync to persistent store
+ fc.force(false); // metadata updates not needed because of preallocation
+ fc.position(fc.position()-1); // skip back the end-of-file marker
}
/**
@@ -202,6 +216,21 @@
long lastModified() {
return file.lastModified();
}
+
+ // allocate a big chunk of data
+ private void preallocate() throws IOException {
+ long position = fc.position();
+ if (position + 4096 >= fc.size()) {
+ FSNamesystem.LOG.info("XXX Preallocating Edit log, current size " +
+ fc.size());
+ long newsize = position + 1024*1024; // 1MB
+ fill.position(0);
+ int written = fc.write(fill, newsize);
+ FSNamesystem.LOG.info("XXX Edit log size is now " + fc.size() +
+ " written " + written + " bytes " +
+ " at offset " + newsize);
+ }
+ }
}
static class EditLogFileInputStream extends EditLogInputStream {
@@ -454,6 +483,11 @@
byte opcode = -1;
try {
opcode = in.readByte();
+ if (opcode == OP_INVALID) {
+ FSNamesystem.LOG.info("Invalid opcode, reached end of edit log " +
+ "Number of transactions found " + numEdits);
+ break; // no more transactions
+ }
} catch (EOFException e) {
break; // no more transactions
}