You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cd...@apache.org on 2009/03/26 01:22:19 UTC

svn commit: r758479 - in /hadoop/core/trunk: CHANGES.txt src/core/org/apache/hadoop/io/SequenceFile.java src/test/org/apache/hadoop/io/TestSequenceFile.java

Author: cdouglas
Date: Thu Mar 26 00:22:18 2009
New Revision: 758479

URL: http://svn.apache.org/viewvc?rev=758479&view=rev
Log:
HADOOP-5423. Include option of preserving file metadata in
SequenceFile::sort. Contributed by Michael Tamm

Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java
    hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=758479&r1=758478&r2=758479&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu Mar 26 00:22:18 2009
@@ -184,6 +184,9 @@
     HADOOP-5491.  In contrib/index, better control memory usage.
     (Ning Li via cutting)
 
+    HADOOP-5423. Include option of preserving file metadata in
+    SequenceFile::sort. (Michael Tamm via cdouglas)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java?rev=758479&r1=758478&r2=758479&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java Thu Mar 26 00:22:18 2009
@@ -2233,6 +2233,7 @@
     private Class valClass;
 
     private Configuration conf;
+    private Metadata metadata;
     
     private Progressable progressable = null;
 
@@ -2245,6 +2246,12 @@
     /** Sort and merge using an arbitrary {@link RawComparator}. */
     public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, 
                   Class valClass, Configuration conf) {
+      this(fs, comparator, keyClass, valClass, conf, new Metadata());
+    }
+
+    /** Sort and merge using an arbitrary {@link RawComparator}. */
+    public Sorter(FileSystem fs, RawComparator comparator, Class keyClass,
+                  Class valClass, Configuration conf, Metadata metadata) {
       this.fs = fs;
       this.comparator = comparator;
       this.keyClass = keyClass;
@@ -2252,6 +2259,7 @@
       this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024;
       this.factor = conf.getInt("io.sort.factor", 100);
       this.conf = conf;
+      this.metadata = metadata;
     }
 
     /** Set the number of streams to merge at once.*/
@@ -2495,7 +2503,7 @@
         long segmentStart = out.getPos();
         Writer writer = createWriter(conf, out, keyClass, valClass, 
                                      isCompressed, isBlockCompressed, codec, 
-                                     new Metadata());
+                                     done ? metadata : new Metadata());
         
         if (!done) {
           writer.sync = null;                     // disable sync on temp files

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java?rev=758479&r1=758478&r2=758479&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java Thu Mar 26 00:22:18 2009
@@ -307,10 +307,10 @@
   public void testSequenceFileMetadata() throws Exception {
     LOG.info("Testing SequenceFile with metadata");
     int count = 1024 * 10;
-    int megabytes = 1;
-    int factor = 5;
     CompressionCodec codec = new DefaultCodec();
     Path file = new Path(System.getProperty("test.build.data",".")+"/test.seq.metadata");
+    Path sortedFile =
+      new Path(System.getProperty("test.build.data",".")+"/test.sorted.seq.metadata");
     Path recordCompressedFile = 
       new Path(System.getProperty("test.build.data",".")+"/test.rc.seq.metadata");
     Path blockCompressedFile = 
@@ -352,6 +352,14 @@
         LOG.info("The retrieved metadata:\n" + aMetadata.toString());
         throw new RuntimeException("metadata not match:  " + 3);
       }
+      // SequenceFile.Sorter
+      sortMetadataTest(fs, file, sortedFile, theMetadata);
+      aMetadata = readMetadata(fs, recordCompressedFile);
+      if (!theMetadata.equals(aMetadata)) {
+        LOG.info("The original metadata:\n" + theMetadata.toString());
+        LOG.info("The retrieved metadata:\n" + aMetadata.toString());
+        throw new RuntimeException("metadata not match:  " + 4);
+      }
     } finally {
       fs.close();
     }
@@ -361,7 +369,7 @@
   
   private static SequenceFile.Metadata readMetadata(FileSystem fs, Path file)
     throws IOException {
-    LOG.info("reading file: " + file.toString() + "\n");
+    LOG.info("reading file: " + file.toString());
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
     SequenceFile.Metadata meta = reader.getMetadata(); 
     reader.close();
@@ -372,7 +380,7 @@
                                         CompressionType compressionType, CompressionCodec codec, SequenceFile.Metadata metadata)
     throws IOException {
     fs.delete(file, true);
-    LOG.info("creating " + count + " records with metadata and with" + compressionType +
+    LOG.info("creating " + count + " records with metadata and with " + compressionType +
              " compression");
     SequenceFile.Writer writer = 
       SequenceFile.createWriter(fs, conf, file, 
@@ -388,6 +396,15 @@
     writer.close();
   }
 
+  private static void sortMetadataTest(FileSystem fs, Path unsortedFile, Path sortedFile, SequenceFile.Metadata metadata)
+    throws IOException {
+    fs.delete(sortedFile, true);
+    LOG.info("sorting: " + unsortedFile + " to: " + sortedFile);
+    final WritableComparator comparator = WritableComparator.get(RandomDatum.class);
+    SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, comparator, RandomDatum.class, RandomDatum.class, conf, metadata);
+    sorter.sort(new Path[] { unsortedFile }, sortedFile, false);
+  }
+
   public void testClose() throws IOException {
     Configuration conf = new Configuration();
     LocalFileSystem fs = new LocalFileSystem();