You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/08/26 19:33:14 UTC

svn commit: r689148 - in /hadoop/core/trunk: CHANGES.txt src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java

Author: dhruba
Date: Tue Aug 26 10:33:14 2008
New Revision: 689148

URL: http://svn.apache.org/viewvc?rev=689148&view=rev
Log:
HADOOP-2330. Preallocate HDFS transaction log to improve performance.
(dhruba and hairong)


Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=689148&r1=689147&r2=689148&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Tue Aug 26 10:33:14 2008
@@ -221,6 +221,9 @@
     HADOOP-3944. Improve documentation for public TupleWritable class in 
     join package. (Chris Douglas via enis)
 
+    HADOOP-2330. Preallocate HDFS transaction log to improve performance.
+    (dhruba and hairong)
+
   OPTIMIZATIONS
 
     HADOOP-3556. Removed lock contention in MD5Hash by changing the 

Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=689148&r1=689147&r2=689148&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Tue Aug 26 10:33:14 2008
@@ -26,9 +26,11 @@
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.RandomAccessFile;
 import java.util.ArrayList;
 import java.lang.Math;
 import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
 
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
@@ -44,6 +46,7 @@
  * 
  */
 public class FSEditLog {
+  private static final byte OP_INVALID = -1;
   private static final byte OP_ADD = 0;
   private static final byte OP_RENAME = 1;  // rename
   private static final byte OP_DELETE = 2;  // delete
@@ -105,14 +108,17 @@
     private FileChannel fc;         // channel of the file stream for sync
     private DataOutputBuffer bufCurrent;  // current buffer for writing
     private DataOutputBuffer bufReady;    // buffer ready for flushing
+    static ByteBuffer fill = ByteBuffer.allocateDirect(512); // preallocation
 
     EditLogFileOutputStream(File name) throws IOException {
       super();
       file = name;
       bufCurrent = new DataOutputBuffer(sizeFlushBuffer);
       bufReady = new DataOutputBuffer(sizeFlushBuffer);
-      fp = new FileOutputStream(name, true); // open for append
-      fc = fp.getChannel();
+      RandomAccessFile rp = new RandomAccessFile(name, "rw");
+      fp = new FileOutputStream(rp.getFD()); // open for append
+      fc = rp.getChannel();
+      fc.position(fc.size());
     }
 
     @Override
@@ -141,6 +147,7 @@
     @Override
     void create() throws IOException {
       fc.truncate(0);
+      fc.position(0);
       bufCurrent.writeInt(FSConstants.LAYOUT_VERSION);
       setReadyToFlush();
       flush();
@@ -158,7 +165,11 @@
       } 
       bufCurrent.close();
       bufReady.close();
+
+      // remove the last INVALID marker from transaction log.
+      fc.truncate(fc.position());
       fp.close();
+      
       bufCurrent = bufReady = null;
     }
 
@@ -167,8 +178,9 @@
      * New data can be still written to the stream while flushing is performed.
      */
     @Override
-    void setReadyToFlush() {
+    void setReadyToFlush() throws IOException {
       assert bufReady.size() == 0 : "previous data is not flushed yet";
+      write(OP_INVALID);           // insert end-of-file marker
       DataOutputBuffer tmp = bufReady;
       bufReady = bufCurrent;
       bufCurrent = tmp;
@@ -181,9 +193,11 @@
      */
     @Override
     protected void flushAndSync() throws IOException {
+      preallocate();            // preallocate file if necessary
       bufReady.writeTo(fp);     // write data to file
       bufReady.reset();         // erase all data in the buffer
-      fc.force(true);           // sync to persistent store
+      fc.force(false);          // metadata updates not needed because of preallocation
+      fc.position(fc.position()-1); // skip back the end-of-file marker
     }
 
     /**
@@ -202,6 +216,21 @@
     long lastModified() {
       return file.lastModified();
     }
+
+    // allocate a big chunk of data
+    private void preallocate() throws IOException {
+      long position = fc.position();
+      if (position + 4096 >= fc.size()) {
+        FSNamesystem.LOG.info("XXX Preallocating Edit log, current size " +
+                              fc.size());
+        long newsize = position + 1024*1024; // 1MB
+        fill.position(0);
+        int written = fc.write(fill, newsize);
+        FSNamesystem.LOG.info("XXX Edit log size is now " + fc.size() +
+                              " written " + written + " bytes " +
+                              " at offset " +  newsize);
+      }
+    }
   }
 
   static class EditLogFileInputStream extends EditLogInputStream {
@@ -454,6 +483,11 @@
         byte opcode = -1;
         try {
           opcode = in.readByte();
+          if (opcode == OP_INVALID) {
+            FSNamesystem.LOG.info("Invalid opcode, reached end of edit log " +
+                                   "Number of transactions found " + numEdits);
+            break; // no more transactions
+          }
         } catch (EOFException e) {
           break; // no more transactions
         }