You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cd...@apache.org on 2009/03/26 01:22:19 UTC
svn commit: r758479 - in /hadoop/core/trunk: CHANGES.txt
src/core/org/apache/hadoop/io/SequenceFile.java
src/test/org/apache/hadoop/io/TestSequenceFile.java
Author: cdouglas
Date: Thu Mar 26 00:22:18 2009
New Revision: 758479
URL: http://svn.apache.org/viewvc?rev=758479&view=rev
Log:
HADOOP-5423. Include option of preserving file metadata in
SequenceFile::sort. Contributed by Michael Tamm
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java
hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=758479&r1=758478&r2=758479&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu Mar 26 00:22:18 2009
@@ -184,6 +184,9 @@
HADOOP-5491. In contrib/index, better control memory usage.
(Ning Li via cutting)
+ HADOOP-5423. Include option of preserving file metadata in
+ SequenceFile::sort. (Michael Tamm via cdouglas)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java?rev=758479&r1=758478&r2=758479&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java Thu Mar 26 00:22:18 2009
@@ -2233,6 +2233,7 @@
private Class valClass;
private Configuration conf;
+ private Metadata metadata;
private Progressable progressable = null;
@@ -2245,6 +2246,12 @@
/** Sort and merge using an arbitrary {@link RawComparator}. */
public Sorter(FileSystem fs, RawComparator comparator, Class keyClass,
Class valClass, Configuration conf) {
+ this(fs, comparator, keyClass, valClass, conf, new Metadata());
+ }
+
+ /** Sort and merge using an arbitrary {@link RawComparator}. */
+ public Sorter(FileSystem fs, RawComparator comparator, Class keyClass,
+ Class valClass, Configuration conf, Metadata metadata) {
this.fs = fs;
this.comparator = comparator;
this.keyClass = keyClass;
@@ -2252,6 +2259,7 @@
this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024;
this.factor = conf.getInt("io.sort.factor", 100);
this.conf = conf;
+ this.metadata = metadata;
}
/** Set the number of streams to merge at once.*/
@@ -2495,7 +2503,7 @@
long segmentStart = out.getPos();
Writer writer = createWriter(conf, out, keyClass, valClass,
isCompressed, isBlockCompressed, codec,
- new Metadata());
+ done ? metadata : new Metadata());
if (!done) {
writer.sync = null; // disable sync on temp files
Modified: hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java?rev=758479&r1=758478&r2=758479&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java Thu Mar 26 00:22:18 2009
@@ -307,10 +307,10 @@
public void testSequenceFileMetadata() throws Exception {
LOG.info("Testing SequenceFile with metadata");
int count = 1024 * 10;
- int megabytes = 1;
- int factor = 5;
CompressionCodec codec = new DefaultCodec();
Path file = new Path(System.getProperty("test.build.data",".")+"/test.seq.metadata");
+ Path sortedFile =
+ new Path(System.getProperty("test.build.data",".")+"/test.sorted.seq.metadata");
Path recordCompressedFile =
new Path(System.getProperty("test.build.data",".")+"/test.rc.seq.metadata");
Path blockCompressedFile =
@@ -352,6 +352,14 @@
LOG.info("The retrieved metadata:\n" + aMetadata.toString());
throw new RuntimeException("metadata not match: " + 3);
}
+ // SequenceFile.Sorter
+ sortMetadataTest(fs, file, sortedFile, theMetadata);
+ aMetadata = readMetadata(fs, recordCompressedFile);
+ if (!theMetadata.equals(aMetadata)) {
+ LOG.info("The original metadata:\n" + theMetadata.toString());
+ LOG.info("The retrieved metadata:\n" + aMetadata.toString());
+ throw new RuntimeException("metadata not match: " + 4);
+ }
} finally {
fs.close();
}
@@ -361,7 +369,7 @@
private static SequenceFile.Metadata readMetadata(FileSystem fs, Path file)
throws IOException {
- LOG.info("reading file: " + file.toString() + "\n");
+ LOG.info("reading file: " + file.toString());
SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
SequenceFile.Metadata meta = reader.getMetadata();
reader.close();
@@ -372,7 +380,7 @@
CompressionType compressionType, CompressionCodec codec, SequenceFile.Metadata metadata)
throws IOException {
fs.delete(file, true);
- LOG.info("creating " + count + " records with metadata and with" + compressionType +
+ LOG.info("creating " + count + " records with metadata and with " + compressionType +
" compression");
SequenceFile.Writer writer =
SequenceFile.createWriter(fs, conf, file,
@@ -388,6 +396,15 @@
writer.close();
}
+ private static void sortMetadataTest(FileSystem fs, Path unsortedFile, Path sortedFile, SequenceFile.Metadata metadata)
+ throws IOException {
+ fs.delete(sortedFile, true);
+ LOG.info("sorting: " + unsortedFile + " to: " + sortedFile);
+ final WritableComparator comparator = WritableComparator.get(RandomDatum.class);
+ SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, comparator, RandomDatum.class, RandomDatum.class, conf, metadata);
+ sorter.sort(new Path[] { unsortedFile }, sortedFile, false);
+ }
+
public void testClose() throws IOException {
Configuration conf = new Configuration();
LocalFileSystem fs = new LocalFileSystem();