You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2009/05/18 07:20:33 UTC
svn commit: r775812 - in /hadoop/core/trunk: CHANGES.txt
src/core/org/apache/hadoop/io/MapFile.java
src/test/core/org/apache/hadoop/io/TestMapFile.java
Author: sharad
Date: Mon May 18 05:20:33 2009
New Revision: 775812
URL: http://svn.apache.org/viewvc?rev=775812&view=rev
Log:
HADOOP-5369. Small tweaks to reduce MapFile index size. Contributed by Ben Maurer.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java
hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=775812&r1=775811&r2=775812&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Mon May 18 05:20:33 2009
@@ -361,6 +361,9 @@
HADOOP-5854. Fix a few "Inconsistent Synchronization" warnings in HDFS.
(Raghu Angadi)
+ HADOOP-5369. Small tweaks to reduce MapFile index size. (Ben Maurer
+ via sharad)
+
OPTIMIZATIONS
HADOOP-5595. NameNode does not need to run a replicator to choose a
Modified: hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java?rev=775812&r1=775811&r2=775812&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/io/MapFile.java Mon May 18 05:20:33 2009
@@ -18,6 +18,8 @@
package org.apache.hadoop.io;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.io.*;
import org.apache.commons.logging.Log;
@@ -74,6 +76,16 @@
private DataOutputBuffer outBuf = new DataOutputBuffer();
private WritableComparable lastKey;
+ /** What's the position (in bytes) we wrote when we got the last index */
+ private long lastIndexPos = -1;
+
+ /**
+ * What was size when we last wrote an index. Set to MIN_VALUE to ensure that
+ * we have an index at position zero -- midKey will throw an exception if this
+ * is not the case
+ */
+ private long lastIndexKeyCount = Long.MIN_VALUE;
+
/** Create the named map for keys of the named class. */
public Writer(Configuration conf, FileSystem fs, String dirName,
@@ -190,10 +202,15 @@
throws IOException {
checkKey(key);
-
- if (size % indexInterval == 0) { // add an index entry
- position.set(data.getLength()); // point to current eof
+
+ long pos = data.getLength();
+ // Only write an index if we've changed positions. In a block compressed
+ // file, this means we write an entry at the start of each block
+ if (size >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) {
+ position.set(pos); // point to current eof
index.append(key, position);
+ lastIndexPos = pos;
+ lastIndexKeyCount = size;
}
data.append(key, val); // append key/value to data
@@ -307,12 +324,14 @@
if (this.keys != null)
return;
this.count = 0;
- this.keys = new WritableComparable[1024];
this.positions = new long[1024];
+
try {
int skip = INDEX_SKIP;
LongWritable position = new LongWritable();
WritableComparable lastKey = null;
+ long lastIndex = -1;
+ ArrayList<WritableComparable> keyBuilder = new ArrayList<WritableComparable>(1024);
while (true) {
WritableComparable k = comparator.newKey();
@@ -323,7 +342,6 @@
if (lastKey != null && comparator.compare(lastKey, k) > 0)
throw new IOException("key out of order: "+k+" after "+lastKey);
lastKey = k;
-
if (skip > 0) {
skip--;
continue; // skip this entry
@@ -331,20 +349,23 @@
skip = INDEX_SKIP; // reset skip
}
- if (count == keys.length) { // time to grow arrays
- int newLength = (keys.length*3)/2;
- WritableComparable[] newKeys = new WritableComparable[newLength];
- long[] newPositions = new long[newLength];
- System.arraycopy(keys, 0, newKeys, 0, count);
- System.arraycopy(positions, 0, newPositions, 0, count);
- keys = newKeys;
- positions = newPositions;
+ // don't read an index that is the same as the previous one. Block
+ // compressed map files used to do this (multiple entries would point
+ // at the same block)
+ if (position.get() == lastIndex)
+ continue;
+
+ if (count == positions.length) {
+ positions = Arrays.copyOf(positions, positions.length * 2);
}
- keys[count] = k;
+ keyBuilder.add(k);
positions[count] = position.get();
count++;
}
+
+ this.keys = keyBuilder.toArray(new WritableComparable[count]);
+ positions = Arrays.copyOf(positions, count);
} catch (EOFException e) {
LOG.warn("Unexpected EOF reading " + index +
" at entry #" + count + ". Ignoring.");
@@ -359,19 +380,17 @@
data.seek(firstPosition);
}
- /** Get the key at approximately the middle of the file.
- *
- * @throws IOException
+ /** Get the key at approximately the middle of the file. Or null if the
+ * file is empty.
*/
public synchronized WritableComparable midKey() throws IOException {
readIndex();
- int pos = ((count - 1) / 2); // middle of the index
- if (pos < 0) {
- throw new IOException("MapFile empty");
+ if (count == 0) {
+ return null;
}
-
- return keys[pos];
+
+ return keys[(count - 1) / 2];
}
/** Reads the final key from the file.
Modified: hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java?rev=775812&r1=775811&r2=775812&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java (original)
+++ hadoop/core/trunk/src/test/core/org/apache/hadoop/io/TestMapFile.java Mon May 18 05:20:33 2009
@@ -87,4 +87,38 @@
closest = (Text)reader.getClosest(key, value, true);
assertEquals(new Text("90"), closest);
}
+
+ public void testMidKey() throws Exception {
+ // Write a mapfile of simple data: keys are
+ Path dirName = new Path(System.getProperty("test.build.data",".") +
+ getName() + ".mapfile");
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path qualifiedDirName = fs.makeQualified(dirName);
+
+ MapFile.Writer writer = new MapFile.Writer(conf, fs,
+ qualifiedDirName.toString(), IntWritable.class, IntWritable.class);
+ writer.append(new IntWritable(1), new IntWritable(1));
+ writer.close();
+ // Now do getClosest on created mapfile.
+ MapFile.Reader reader = new MapFile.Reader(fs, qualifiedDirName.toString(),
+ conf);
+ assertEquals(new IntWritable(1), reader.midKey());
+ }
+
+
+ public void testMidKeyEmpty() throws Exception {
+ // Write a mapfile of simple data: keys are
+ Path dirName = new Path(System.getProperty("test.build.data",".") +
+ getName() + ".mapfile");
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path qualifiedDirName = fs.makeQualified(dirName);
+
+ MapFile.Writer writer = new MapFile.Writer(conf, fs,
+ qualifiedDirName.toString(), IntWritable.class, IntWritable.class);
+ writer.close();
+ // Now do getClosest on created mapfile.
+ MapFile.Reader reader = new MapFile.Reader(fs, qualifiedDirName.toString(),
+ conf);
+ assertEquals(null, reader.midKey());
+ }
}