You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2009/05/18 07:20:33 UTC

svn commit: r775812 - in /hadoop/core/trunk: CHANGES.txt src/core/org/apache/hadoop/io/MapFile.java src/test/core/org/apache/hadoop/io/TestMapFile.java

Author: sharad
Date: Mon May 18 05:20:33 2009
New Revision: 775812

URL: http://svn.apache.org/viewvc?rev=775812&view=rev
Log:
HADOOP-5369. Small tweaks to reduce MapFile index size. Contributed by Ben Maurer.

Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java
    hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=775812&r1=775811&r2=775812&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Mon May 18 05:20:33 2009
@@ -361,6 +361,9 @@
     HADOOP-5854. Fix a few "Inconsistent Synchronization" warnings in HDFS.
     (Raghu Angadi)
 
+    HADOOP-5369. Small tweaks to reduce MapFile index size. (Ben Maurer 
+    via sharad)
+
   OPTIMIZATIONS
 
     HADOOP-5595. NameNode does not need to run a replicator to choose a

Modified: hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java?rev=775812&r1=775811&r2=775812&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java Mon May 18 05:20:33 2009
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.io;
 
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.io.*;
 
 import org.apache.commons.logging.Log;
@@ -74,6 +76,16 @@
     private DataOutputBuffer outBuf = new DataOutputBuffer();
     private WritableComparable lastKey;
 
+    /** What's the position (in bytes) we wrote when we got the last index */
+    private long lastIndexPos = -1;
+
+    /**
+     * What was size when we last wrote an index. Set to MIN_VALUE to ensure that
+     * we have an index at position zero -- midKey will throw an exception if this
+     * is not the case
+     */
+    private long lastIndexKeyCount = Long.MIN_VALUE;
+
 
     /** Create the named map for keys of the named class. */
     public Writer(Configuration conf, FileSystem fs, String dirName,
@@ -190,10 +202,15 @@
       throws IOException {
 
       checkKey(key);
-      
-      if (size % indexInterval == 0) {            // add an index entry
-        position.set(data.getLength());           // point to current eof
+
+      long pos = data.getLength();      
+      // Only write an index if we've changed positions. In a block compressed
+      // file, this means we write an entry at the start of each block      
+      if (size >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) {
+        position.set(pos);                        // point to current eof
         index.append(key, position);
+        lastIndexPos = pos;
+        lastIndexKeyCount = size;
       }
 
       data.append(key, val);                      // append key/value to data
@@ -307,12 +324,14 @@
       if (this.keys != null)
         return;
       this.count = 0;
-      this.keys = new WritableComparable[1024];
       this.positions = new long[1024];
+
       try {
         int skip = INDEX_SKIP;
         LongWritable position = new LongWritable();
         WritableComparable lastKey = null;
+        long lastIndex = -1;
+        ArrayList<WritableComparable> keyBuilder = new ArrayList<WritableComparable>(1024);
         while (true) {
           WritableComparable k = comparator.newKey();
 
@@ -323,7 +342,6 @@
           if (lastKey != null && comparator.compare(lastKey, k) > 0)
             throw new IOException("key out of order: "+k+" after "+lastKey);
           lastKey = k;
-          
           if (skip > 0) {
             skip--;
             continue;                             // skip this entry
@@ -331,20 +349,23 @@
             skip = INDEX_SKIP;                    // reset skip
           }
 
-          if (count == keys.length) {                // time to grow arrays
-            int newLength = (keys.length*3)/2;
-            WritableComparable[] newKeys = new WritableComparable[newLength];
-            long[] newPositions = new long[newLength];
-            System.arraycopy(keys, 0, newKeys, 0, count);
-            System.arraycopy(positions, 0, newPositions, 0, count);
-            keys = newKeys;
-            positions = newPositions;
+	  // don't read an index that is the same as the previous one. Block
+	  // compressed map files used to do this (multiple entries would point
+	  // at the same block)
+	  if (position.get() == lastIndex)
+	    continue;
+
+          if (count == positions.length) {
+	    positions = Arrays.copyOf(positions, positions.length * 2);
           }
 
-          keys[count] = k;
+          keyBuilder.add(k);
           positions[count] = position.get();
           count++;
         }
+
+        this.keys = keyBuilder.toArray(new WritableComparable[count]);
+        positions = Arrays.copyOf(positions, count);
       } catch (EOFException e) {
         LOG.warn("Unexpected EOF reading " + index +
                               " at entry #" + count + ".  Ignoring.");
@@ -359,19 +380,17 @@
       data.seek(firstPosition);
     }
 
-    /** Get the key at approximately the middle of the file.
-     * 
-     * @throws IOException
+    /** Get the key at approximately the middle of the file. Or null if the
+     *  file is empty. 
      */
     public synchronized WritableComparable midKey() throws IOException {
 
       readIndex();
-      int pos = ((count - 1) / 2);              // middle of the index
-      if (pos < 0) {
-        throw new IOException("MapFile empty");
+      if (count == 0) {
+        return null;
       }
-      
-      return keys[pos];
+    
+      return keys[(count - 1) / 2];
     }
     
     /** Reads the final key from the file.

Modified: hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java?rev=775812&r1=775811&r2=775812&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java (original)
+++ hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java Mon May 18 05:20:33 2009
@@ -87,4 +87,38 @@
     closest = (Text)reader.getClosest(key, value, true);
     assertEquals(new Text("90"), closest);
   }
+
+  public void testMidKey() throws Exception {
+    // Write a mapfile of simple data: keys are 
+    Path dirName = new Path(System.getProperty("test.build.data",".") +
+      getName() + ".mapfile"); 
+    FileSystem fs = FileSystem.getLocal(conf);
+    Path qualifiedDirName = fs.makeQualified(dirName);
+ 
+    MapFile.Writer writer = new MapFile.Writer(conf, fs,
+      qualifiedDirName.toString(), IntWritable.class, IntWritable.class);
+    writer.append(new IntWritable(1), new IntWritable(1));
+    writer.close();
+    // Now do getClosest on created mapfile.
+    MapFile.Reader reader = new MapFile.Reader(fs, qualifiedDirName.toString(),
+      conf);
+    assertEquals(new IntWritable(1), reader.midKey());
+  }
+
+
+  public void testMidKeyEmpty() throws Exception {
+    // Write a mapfile of simple data: keys are 
+    Path dirName = new Path(System.getProperty("test.build.data",".") +
+      getName() + ".mapfile"); 
+    FileSystem fs = FileSystem.getLocal(conf);
+    Path qualifiedDirName = fs.makeQualified(dirName);
+ 
+    MapFile.Writer writer = new MapFile.Writer(conf, fs,
+      qualifiedDirName.toString(), IntWritable.class, IntWritable.class);
+    writer.close();
+    // Now do getClosest on created mapfile.
+    MapFile.Reader reader = new MapFile.Reader(fs, qualifiedDirName.toString(),
+      conf);
+    assertEquals(null, reader.midKey());
+  }
 }