You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by jo...@apache.org on 2009/07/15 18:42:07 UTC

svn commit: r794324 - in /hadoop/mapreduce/trunk: ./ src/contrib/mrunit/src/java/org/apache/hadoop/mrunit/mock/ src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/ src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/mock/

Author: johan
Date: Wed Jul 15 16:42:06 2009
New Revision: 794324

URL: http://svn.apache.org/viewvc?rev=794324&view=rev
Log:
MAPREDUCE-680. Fix so MRUnit can handle reuse of Writable objects. (Aaron Kimball via johan)

Added:
    hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/mock/TestMockOutputCollector.java
Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/contrib/mrunit/src/java/org/apache/hadoop/mrunit/mock/MockOutputCollector.java
    hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/AllTests.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=794324&r1=794323&r2=794324&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Wed Jul 15 16:42:06 2009
@@ -199,3 +199,6 @@
 
     MAPREDUCE-565. Fix partitioner to work with new API. (Owen O'Malley via
     cdouglas)
+    
+    MAPREDUCE-680. Fix so MRUnit can handle reuse of Writable objects.
+    (Aaron Kimball via johan)

Modified: hadoop/mapreduce/trunk/src/contrib/mrunit/src/java/org/apache/hadoop/mrunit/mock/MockOutputCollector.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/mrunit/src/java/org/apache/hadoop/mrunit/mock/MockOutputCollector.java?rev=794324&r1=794323&r2=794324&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/mrunit/src/java/org/apache/hadoop/mrunit/mock/MockOutputCollector.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/mrunit/src/java/org/apache/hadoop/mrunit/mock/MockOutputCollector.java Wed Jul 15 16:42:06 2009
@@ -22,38 +22,92 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.io.serializer.Serializer;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.hadoop.util.ReflectionUtils;
+
 
 /**
  * OutputCollector to use in the test framework for Mapper and Reducer
  * classes. Accepts a set of output (k, v) pairs and returns them to the
  * framework for validation.
- *
- * BUG: Currently, this does not make deep copies of values passed to collect().
- * So emitting the same Text object (for instance) repeatedly, with different
- * internal string data each time, is not tested in the same way that Hadoop's
- * OutputCollector works.
- *
  */
 public class MockOutputCollector<K, V> implements OutputCollector<K, V> {
 
   private ArrayList<Pair<K, V>> collectedOutputs;
+  private SerializationFactory serializationFactory;
+  private DataOutputBuffer outBuffer;
+  private DataInputBuffer inBuffer;
+  private Configuration conf;
+
 
   public MockOutputCollector() {
     collectedOutputs = new ArrayList<Pair<K, V>>();
+
+    outBuffer = new DataOutputBuffer();
+    inBuffer = new DataInputBuffer();
+
+    conf = new Configuration();
+    serializationFactory = new SerializationFactory(conf);
+  }
+
+
+  private Object getInstance(Class klazz) {
+    return ReflectionUtils.newInstance(klazz, conf);
+  }
+
+
+  private Object deepCopy(Object obj) throws IOException {
+
+    if (null == obj) {
+      return null;
+    }
+
+    Class klazz = obj.getClass();
+    Object out = getInstance(klazz); // the output object to return.
+    Serializer s = serializationFactory.getSerializer(klazz);
+    Deserializer ds = serializationFactory.getDeserializer(klazz);
+
+    try {
+      s.open(outBuffer);
+      ds.open(inBuffer);
+
+      outBuffer.reset();
+      s.serialize(obj);
+
+      byte [] data = outBuffer.getData();
+      int len = outBuffer.getLength();
+      inBuffer.reset(data, len);
+
+      out = ds.deserialize(out);
+
+      return out;
+    } finally {
+      try {
+        s.close();
+      } catch (IOException ioe) {
+        // ignore this; we're closing.
+      }
+
+      try {
+        ds.close();
+      } catch (IOException ioe) {
+        // ignore this; we're closing.
+      }
+    }
   }
 
   /**
    * Accepts another (key, value) pair as an output of this mapper/reducer.
-   *
-   * BUG: Currently, this does not make deep copies of values passed to collect().
-   * So emitting the same Text object (for instance) repeatedly, with different
-   * internal string data each time, is not tested in the same way that Hadoop's
-   * OutputCollector works.
    */
   public void collect(K key, V value) throws IOException {
-    collectedOutputs.add(new Pair<K, V>(key, value));
+    collectedOutputs.add(new Pair<K, V>((K) deepCopy(key), (V) deepCopy(value)));
   }
 
   /**

Modified: hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/AllTests.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/AllTests.java?rev=794324&r1=794323&r2=794324&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/AllTests.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/AllTests.java Wed Jul 15 16:42:06 2009
@@ -18,6 +18,7 @@
 package org.apache.hadoop.mrunit;
 
 import org.apache.hadoop.mrunit.mock.TestMockReporter;
+import org.apache.hadoop.mrunit.mock.TestMockOutputCollector;
 
 import junit.framework.Test;
 import junit.framework.TestSuite;
@@ -36,6 +37,7 @@
     suite.addTestSuite(TestMapDriver.class);
     suite.addTestSuite(TestMapReduceDriver.class);
     suite.addTestSuite(TestMockReporter.class);
+    suite.addTestSuite(TestMockOutputCollector.class);
     suite.addTestSuite(TestReduceDriver.class);
     suite.addTestSuite(TestTestDriver.class);
     suite.addTestSuite(TestExample.class);

Added: hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/mock/TestMockOutputCollector.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/mock/TestMockOutputCollector.java?rev=794324&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/mock/TestMockOutputCollector.java (added)
+++ hadoop/mapreduce/trunk/src/contrib/mrunit/src/test/org/apache/hadoop/mrunit/mock/TestMockOutputCollector.java Wed Jul 15 16:42:06 2009
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mrunit.mock;
+
+import junit.framework.TestCase;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mrunit.MapDriver;
+
+import org.junit.Test;
+
+
+public class TestMockOutputCollector extends TestCase {
+
+  /**
+   * A mapper that reuses the same key and val objects to emit multiple values
+   */
+  class RepeatMapper extends MapReduceBase implements Mapper<Text, Text, Text, Text> {
+    public void map(Text k, Text v, OutputCollector<Text, Text> out, Reporter r)
+        throws IOException {
+      Text outKey = new Text();
+      Text outVal = new Text();
+
+      outKey.set("1");
+      outVal.set("a");
+      out.collect(outKey, outVal);
+
+      outKey.set("2");
+      outVal.set("b");
+      out.collect(outKey, outVal);
+
+      outKey.set("3");
+      outVal.set("c");
+      out.collect(outKey, outVal);
+    }
+  }
+
+  @Test
+  public void testRepeatedObjectUse() {
+    Mapper<Text, Text, Text, Text> mapper = new RepeatMapper();
+    MapDriver<Text, Text, Text, Text> driver = new MapDriver(mapper);
+
+    driver.withInput(new Text("inK"), new Text("inV"))
+          .withOutput(new Text("1"), new Text("a"))
+          .withOutput(new Text("2"), new Text("b"))
+          .withOutput(new Text("3"), new Text("c"))
+          .runTest();
+  }
+}
+