You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2010/08/23 21:18:15 UTC

svn commit: r988260 - in /avro/trunk: CHANGES.txt lang/java/src/test/java/org/apache/avro/mapred/TestWordCount.java lang/java/src/test/java/org/apache/avro/mapred/WordCountUtil.java

Author: cutting
Date: Mon Aug 23 19:18:15 2010
New Revision: 988260

URL: http://svn.apache.org/viewvc?rev=988260&view=rev
Log:
AVRO-534. Java: Permit mapred jobs to specify a different input schema from the input file.  Contributed by Harsh J Chouraria.

Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/TestWordCount.java
    avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/WordCountUtil.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=988260&r1=988259&r2=988260&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Mon Aug 23 19:18:15 2010
@@ -131,6 +131,9 @@ Avro 1.4.0 (unreleased)
     AVRO-615. Java: Improve error message for NullPointerException
     when writing data.  (cutting)
 
+    AVRO-534. Java: Permit mapred jobs to specify a different input
+    schema from the input file.  (Harsh J Chouraria via cutting)
+
   BUG FIXES
 
     AVRO-618. Avro doesn't work with python 2.4 (philz)

Modified: avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/TestWordCount.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/TestWordCount.java?rev=988260&r1=988259&r2=988260&view=diff
==============================================================================
--- avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/TestWordCount.java (original)
+++ avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/TestWordCount.java Mon Aug 23 19:18:15 2010
@@ -21,7 +21,13 @@ package org.apache.avro.mapred;
 import java.io.IOException;
 import java.util.StringTokenizer;
 
+import junit.framework.Assert;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -87,5 +93,52 @@ public class TestWordCount {
     
     WordCountUtil.validateCountsFile();
   }
+  
+  @Test
+  @SuppressWarnings("deprecation")
+  public void testProjection() throws Exception {
+    JobConf job = new JobConf();
+    
+    Integer defaultRank = new Integer(-1);
+    
+    String jsonSchema = 
+      "{\"type\":\"record\"," +
+      "\"name\":\"org.apache.avro.mapred.Pair\","+
+      "\"fields\": [ " + 
+        "{\"name\":\"rank\", \"type\":\"int\", \"default\": -1}," +
+        "{\"name\":\"value\", \"type\":\"long\"}" + 
+      "]}";
+    
+    Schema readerSchema = Schema.parse(jsonSchema);
+    
+    AvroJob.setInputSchema(job, readerSchema);
+    
+    String dir = System.getProperty("test.dir", ".") + "/mapred";
+    Path inputPath = new Path(dir + "/out" + "/part-00000" + AvroOutputFormat.EXT);
+    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
+    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);
+    
+    AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job, fileSplit);
+    
+    AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null);
+    NullWritable ignore = NullWritable.get();
+    
+    long sumOfCounts = 0;
+    long numOfCounts = 0;
+    while(recordReader.next(inputPair, ignore)) {
+      Assert.assertEquals((Integer)inputPair.datum().get(0), defaultRank);
+      sumOfCounts += (Long) inputPair.datum().get(1);
+      numOfCounts++;
+    }
+    
+    Assert.assertEquals(numOfCounts, WordCountUtil.COUNTS.size());
+    
+    long actualSumOfCounts = 0;
+    for(Long count : WordCountUtil.COUNTS.values()) {
+      actualSumOfCounts += count;
+    }
+    
+    Assert.assertEquals(sumOfCounts, actualSumOfCounts);
+  }
 
 }

Modified: avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/WordCountUtil.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/WordCountUtil.java?rev=988260&r1=988259&r2=988260&view=diff
==============================================================================
--- avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/WordCountUtil.java (original)
+++ avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/WordCountUtil.java Mon Aug 23 19:18:15 2010
@@ -60,7 +60,7 @@ class WordCountUtil {
     "the rain in spain falls mainly on the plains"
   };
 
-  private static final Map<String,Long> COUNTS =
+  public static final Map<String,Long> COUNTS =
     new TreeMap<String,Long>();
   static {
     for (String line : LINES) {