You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pa...@apache.org on 2015/04/01 20:07:34 UTC

[03/51] [partial] mahout git commit: MAHOUT-1655 Refactors mr-legacy into mahout-hdfs and mahout-mr, closes apache/mahout#86

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorTest.java b/mr/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorTest.java
new file mode 100644
index 0000000..dbb950a
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorTest.java
@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.mahout.clustering.topdown.postprocessor;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.clustering.ClusteringTestUtils;
+import org.apache.mahout.clustering.canopy.CanopyDriver;
+import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.topdown.PathDirectory;
+import org.apache.mahout.common.DummyOutputCollector;
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public final class ClusterOutputPostProcessorTest extends MahoutTestCase {
+
+  private static final double[][] REFERENCE = { {1, 1}, {2, 1}, {1, 2}, {4, 4}, {5, 4}, {4, 5}, {5, 5}};
+
+  private FileSystem fs;
+
+  private Path outputPath;
+
+  private Configuration conf;
+
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    Configuration conf = getConfiguration();
+    fs = FileSystem.get(conf);
+  }
+
+  private static List<VectorWritable> getPointsWritable(double[][] raw) {
+    List<VectorWritable> points = Lists.newArrayList();
+    for (double[] fr : raw) {
+      Vector vec = new RandomAccessSparseVector(fr.length);
+      vec.assign(fr);
+      points.add(new VectorWritable(vec));
+    }
+    return points;
+  }
+
+  /**
+   * Story: User wants to use cluster post processor after canopy clustering and then run clustering on the
+   * output clusters
+   */
+  @Test
+  public void testTopDownClustering() throws Exception {
+    List<VectorWritable> points = getPointsWritable(REFERENCE);
+
+    Path pointsPath = getTestTempDirPath("points");
+    conf = getConfiguration();
+    ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file1"), fs, conf);
+    ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file2"), fs, conf);
+
+    outputPath = getTestTempDirPath("output");
+
+    topLevelClustering(pointsPath, conf);
+
+    Map<String,Path> postProcessedClusterDirectories = ouputPostProcessing(conf);
+
+    assertPostProcessedOutput(postProcessedClusterDirectories);
+
+    bottomLevelClustering(postProcessedClusterDirectories);
+  }
+
+  private void assertTopLevelCluster(Entry<String,Path> cluster) {
+    String clusterId = cluster.getKey();
+    Path clusterPath = cluster.getValue();
+
+    try {
+      if ("0".equals(clusterId)) {
+        assertPointsInFirstTopLevelCluster(clusterPath);
+      } else if ("1".equals(clusterId)) {
+        assertPointsInSecondTopLevelCluster(clusterPath);
+      }
+    } catch (IOException e) {
+      Assert.fail("Exception occurred while asserting top level cluster.");
+    }
+
+  }
+
+  private void assertPointsInFirstTopLevelCluster(Path clusterPath) throws IOException {
+    List<Vector> vectorsInCluster = getVectorsInCluster(clusterPath);
+    for (Vector vector : vectorsInCluster) {
+      Assert.assertTrue(ArrayUtils.contains(new String[] {"{0:1.0,1:1.0}", "{0:2.0,1:1.0}", "{0:1.0,1:2.0}"},
+        vector.asFormatString()));
+    }
+  }
+
+  private void assertPointsInSecondTopLevelCluster(Path clusterPath) throws IOException {
+    List<Vector> vectorsInCluster = getVectorsInCluster(clusterPath);
+    for (Vector vector : vectorsInCluster) {
+      Assert.assertTrue(ArrayUtils.contains(new String[] {"{0:4.0,1:4.0}", "{0:5.0,1:4.0}", "{0:4.0,1:5.0}",
+                                                          "{0:5.0,1:5.0}"}, vector.asFormatString()));
+    }
+  }
+
+  private List<Vector> getVectorsInCluster(Path clusterPath) throws IOException {
+    Path[] partFilePaths = FileUtil.stat2Paths(fs.globStatus(clusterPath));
+    FileStatus[] listStatus = fs.listStatus(partFilePaths);
+    List<Vector> vectors = Lists.newArrayList();
+    for (FileStatus partFile : listStatus) {
+      SequenceFile.Reader topLevelClusterReader = new SequenceFile.Reader(fs, partFile.getPath(), conf);
+      Writable clusterIdAsKey = new LongWritable();
+      VectorWritable point = new VectorWritable();
+      while (topLevelClusterReader.next(clusterIdAsKey, point)) {
+        vectors.add(point.get());
+      }
+    }
+    return vectors;
+  }
+
+  private void bottomLevelClustering(Map<String,Path> postProcessedClusterDirectories) throws IOException,
+                                                                                      InterruptedException,
+                                                                                      ClassNotFoundException {
+    for (Entry<String,Path> topLevelCluster : postProcessedClusterDirectories.entrySet()) {
+      String clusterId = topLevelCluster.getKey();
+      Path topLevelclusterPath = topLevelCluster.getValue();
+
+      Path bottomLevelCluster = PathDirectory.getBottomLevelClusterPath(outputPath, clusterId);
+      CanopyDriver.run(conf, topLevelclusterPath, bottomLevelCluster, new ManhattanDistanceMeasure(), 2.1,
+        2.0, true, 0.0, true);
+      assertBottomLevelCluster(bottomLevelCluster);
+    }
+  }
+
+  private void assertBottomLevelCluster(Path bottomLevelCluster) {
+    Path clusteredPointsPath = new Path(bottomLevelCluster, "clusteredPoints");
+
+    DummyOutputCollector<IntWritable,WeightedVectorWritable> collector =
+        new DummyOutputCollector<IntWritable,WeightedVectorWritable>();
+
+    // The key is the clusterId, the value is the weighted vector
+    for (Pair<IntWritable,WeightedVectorWritable> record :
+         new SequenceFileIterable<IntWritable,WeightedVectorWritable>(new Path(clusteredPointsPath, "part-m-0"),
+                                                                      conf)) {
+      collector.collect(record.getFirst(), record.getSecond());
+    }
+    int clusterSize = collector.getKeys().size();
+    // First top level cluster produces two more clusters, second top level cluster is not broken again
+    assertTrue(clusterSize == 1 || clusterSize == 2);
+
+  }
+
+  private void assertPostProcessedOutput(Map<String,Path> postProcessedClusterDirectories) {
+    for (Entry<String,Path> cluster : postProcessedClusterDirectories.entrySet()) {
+      assertTopLevelCluster(cluster);
+    }
+  }
+
+  private Map<String,Path> ouputPostProcessing(Configuration conf) throws IOException {
+    ClusterOutputPostProcessor clusterOutputPostProcessor = new ClusterOutputPostProcessor(outputPath,
+        outputPath, conf);
+    clusterOutputPostProcessor.process();
+    return clusterOutputPostProcessor.getPostProcessedClusterDirectories();
+  }
+
+  private void topLevelClustering(Path pointsPath, Configuration conf) throws IOException,
+                                                                      InterruptedException,
+                                                                      ClassNotFoundException {
+    CanopyDriver.run(conf, pointsPath, outputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, true, 0.0, true);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/AbstractJobTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/AbstractJobTest.java b/mr/src/test/java/org/apache/mahout/common/AbstractJobTest.java
new file mode 100644
index 0000000..7683b57
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/AbstractJobTest.java
@@ -0,0 +1,240 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.mahout.common;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.junit.Test;
+
+public final class AbstractJobTest extends MahoutTestCase {
+  
+  interface AbstractJobFactory {
+    AbstractJob getJob();
+  }
+  
+  @Test
+  public void testFlag() throws Exception {
+    final Map<String,List<String>> testMap = Maps.newHashMap();
+    
+    AbstractJobFactory fact = new AbstractJobFactory() {
+      @Override
+      public AbstractJob getJob() {
+        return new AbstractJob() {
+          @Override
+          public int run(String[] args) throws IOException {
+            addFlag("testFlag", "t", "a simple test flag");
+            
+            Map<String,List<String>> argMap = parseArguments(args);
+            testMap.clear();
+            testMap.putAll(argMap);
+            return 1;
+          }
+        };
+      }
+    };
+    
+    // testFlag will only be present if specified on the command-line
+    
+    ToolRunner.run(fact.getJob(), new String[0]);
+    assertFalse("test map for absent flag", testMap.containsKey("--testFlag"));
+    
+    String[] withFlag = { "--testFlag" };
+    ToolRunner.run(fact.getJob(), withFlag);
+    assertTrue("test map for present flag", testMap.containsKey("--testFlag"));
+  }
+  
+  @Test
+  public void testOptions() throws Exception {
+    final Map<String,List<String>> testMap = Maps.newHashMap();
+    
+    AbstractJobFactory fact = new AbstractJobFactory() {
+      @Override
+      public AbstractJob getJob() {
+        return new AbstractJob() {
+          @Override
+          public int run(String[] args) throws IOException {
+            this.addOption(DefaultOptionCreator.overwriteOption().create());
+            this.addOption("option", "o", "option");
+            this.addOption("required", "r", "required", true /* required */);
+            this.addOption("notRequired", "nr", "not required", false /* not required */);
+            this.addOption("hasDefault", "hd", "option w/ default", "defaultValue");
+
+
+            Map<String,List<String>> argMap = parseArguments(args);
+            if (argMap == null) {
+              return -1;
+            }
+            
+            testMap.clear();
+            testMap.putAll(argMap);
+
+            return 0;
+          }
+        };
+      }
+    };
+
+    int ret = ToolRunner.run(fact.getJob(), new String[0]);
+    assertEquals("-1 for missing required options", -1, ret);
+    
+    ret = ToolRunner.run(fact.getJob(), new String[]{
+      "--required", "requiredArg"
+    });
+    assertEquals("0 for no missing required options", 0, ret);
+    assertEquals(Collections.singletonList("requiredArg"), testMap.get("--required"));
+    assertEquals(Collections.singletonList("defaultValue"), testMap.get("--hasDefault"));
+    assertNull(testMap.get("--option"));
+    assertNull(testMap.get("--notRequired"));
+    assertFalse(testMap.containsKey("--overwrite"));
+    
+    ret = ToolRunner.run(fact.getJob(), new String[]{
+      "--required", "requiredArg",
+      "--unknownArg"
+    });
+    assertEquals("-1 for including unknown options", -1, ret);
+
+    ret = ToolRunner.run(fact.getJob(), new String[]{
+      "--required", "requiredArg",
+      "--required", "requiredArg2",
+    });
+    assertEquals("-1 for including duplicate options", -1, ret);
+    
+    ret = ToolRunner.run(fact.getJob(), new String[]{
+      "--required", "requiredArg", 
+      "--overwrite",
+      "--hasDefault", "nonDefault",
+      "--option", "optionValue",
+      "--notRequired", "notRequired"
+    });
+    assertEquals("0 for no missing required options", 0, ret);
+    assertEquals(Collections.singletonList("requiredArg"), testMap.get("--required"));
+    assertEquals(Collections.singletonList("nonDefault"), testMap.get("--hasDefault"));
+    assertEquals(Collections.singletonList("optionValue"), testMap.get("--option"));
+    assertEquals(Collections.singletonList("notRequired"), testMap.get("--notRequired"));
+    assertTrue(testMap.containsKey("--overwrite"));
+    
+    ret = ToolRunner.run(fact.getJob(), new String[]{
+      "-r", "requiredArg", 
+      "-ow",
+      "-hd", "nonDefault",
+      "-o", "optionValue",
+      "-nr", "notRequired"
+    });
+    assertEquals("0 for no missing required options", 0, ret);
+    assertEquals(Collections.singletonList("requiredArg"), testMap.get("--required"));
+    assertEquals(Collections.singletonList("nonDefault"), testMap.get("--hasDefault"));
+    assertEquals(Collections.singletonList("optionValue"), testMap.get("--option"));
+    assertEquals(Collections.singletonList("notRequired"), testMap.get("--notRequired"));
+    assertTrue(testMap.containsKey("--overwrite"));
+    
+  }
+  
+  @Test
+  public void testInputOutputPaths() throws Exception {
+    
+    AbstractJobFactory fact = new AbstractJobFactory() {
+      @Override
+      public AbstractJob getJob() {
+        return new AbstractJob() {
+          @Override
+          public int run(String[] args) throws IOException {
+            addInputOption();
+            addOutputOption();
+            
+            // arg map should be null if a required option is missing.
+            Map<String, List<String>> argMap = parseArguments(args);
+            
+            if (argMap == null) {
+              return -1;
+            }
+            
+            Path inputPath = getInputPath();
+            assertNotNull("getInputPath() returns non-null", inputPath);
+            
+            Path outputPath = getInputPath();
+            assertNotNull("getOutputPath() returns non-null", outputPath);
+            return 0;
+          }
+        };
+      }
+    };
+
+    int ret = ToolRunner.run(fact.getJob(), new String[0]);
+    assertEquals("-1 for missing input option", -1, ret);
+    
+    String testInputPath = "testInputPath";
+
+    AbstractJob job = fact.getJob();
+    ret = ToolRunner.run(job, new String[]{
+        "--input", testInputPath });
+    assertEquals("-1 for missing output option", -1, ret);
+    assertEquals("input path is correct", testInputPath, job.getInputPath().toString());
+    
+    job = fact.getJob();
+    String testOutputPath = "testOutputPath";
+    ret = ToolRunner.run(job, new String[]{
+        "--output", testOutputPath });
+    assertEquals("-1 for missing input option", -1, ret);
+    assertEquals("output path is correct", testOutputPath, job.getOutputPath().toString());
+    
+    job = fact.getJob();
+    ret = ToolRunner.run(job, new String[]{
+        "--input", testInputPath, "--output", testOutputPath });
+    assertEquals("0 for complete options", 0, ret);
+    assertEquals("input path is correct", testInputPath, job.getInputPath().toString());
+    assertEquals("output path is correct", testOutputPath, job.getOutputPath().toString());
+    
+    job = fact.getJob();
+    ret = ToolRunner.run(job, new String[]{ 
+        "--input", testInputPath, "--output", testOutputPath });
+    assertEquals("0 for complete options", 0, ret);
+    assertEquals("input path is correct", testInputPath, job.getInputPath().toString());
+    assertEquals("output path is correct", testOutputPath, job.getOutputPath().toString());
+    
+    job = fact.getJob();
+    String testInputPropertyPath = "testInputPropertyPath";
+    String testOutputPropertyPath = "testOutputPropertyPath";
+    ret = ToolRunner.run(job, new String[]{
+        "-Dmapred.input.dir=" + testInputPropertyPath, 
+        "-Dmapred.output.dir=" + testOutputPropertyPath });
+    assertEquals("0 for complete options", 0, ret);
+    assertEquals("input path from property is correct", testInputPropertyPath, job.getInputPath().toString());
+    assertEquals("output path from property is correct", testOutputPropertyPath, job.getOutputPath().toString());
+    
+    job = fact.getJob();
+    ret = ToolRunner.run(job, new String[]{ 
+        "-Dmapred.input.dir=" + testInputPropertyPath,
+        "-Dmapred.output.dir=" + testOutputPropertyPath,
+        "--input", testInputPath,
+        "--output", testOutputPath });
+    assertEquals("0 for complete options", 0, ret);
+    assertEquals("input command-line option precedes property",
+        testInputPath, job.getInputPath().toString());
+    assertEquals("output command-line option precedes property",
+        testOutputPath, job.getOutputPath().toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/DistributedCacheFileLocationTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/DistributedCacheFileLocationTest.java b/mr/src/test/java/org/apache/mahout/common/DistributedCacheFileLocationTest.java
new file mode 100644
index 0000000..5d3532c
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/DistributedCacheFileLocationTest.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common;
+
+import org.apache.hadoop.fs.Path;
+import org.junit.Test;
+
+import java.io.File;
+import java.net.URI;
+
+
+public class DistributedCacheFileLocationTest extends MahoutTestCase {
+
+  static final File FILE_I_WANT_TO_FIND = new File("file/i_want_to_find.txt");
+  static final URI[] DISTRIBUTED_CACHE_FILES = new URI[] {
+      new File("/first/file").toURI(), new File("/second/file").toURI(), FILE_I_WANT_TO_FIND.toURI() };
+
+  @Test
+  public void nonExistingFile() {
+    Path path = HadoopUtil.findInCacheByPartOfFilename("no such file", DISTRIBUTED_CACHE_FILES);
+    assertNull(path);
+  }
+
+  @Test
+  public void existingFile() {
+    Path path = HadoopUtil.findInCacheByPartOfFilename("want_to_find", DISTRIBUTED_CACHE_FILES);
+    assertNotNull(path);
+    assertEquals(FILE_I_WANT_TO_FIND.getName(), path.getName());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/DummyOutputCollector.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/DummyOutputCollector.java b/mr/src/test/java/org/apache/mahout/common/DummyOutputCollector.java
new file mode 100644
index 0000000..6951f5a
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/DummyOutputCollector.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.OutputCollector;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+public final class DummyOutputCollector<K extends WritableComparable, V extends Writable>
+    implements OutputCollector<K,V> {
+
+  private final Map<K, List<V>> data = new TreeMap<K,List<V>>();
+
+  @Override
+  public void collect(K key,V values) {
+    List<V> points = data.get(key);
+    if (points == null) {
+      points = Lists.newArrayList();
+      data.put(key, points);
+    }
+    points.add(values);
+  }
+
+  public Map<K,List<V>> getData() {
+    return data;
+  }
+
+  public List<V> getValue(K key) {
+    return data.get(key);
+  }
+
+  public Set<K> getKeys() {
+    return data.keySet();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/DummyRecordWriter.java b/mr/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
new file mode 100644
index 0000000..7dea174
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.MapContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.ReduceContext;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public final class DummyRecordWriter<K extends Writable, V extends Writable> extends RecordWriter<K, V> {
+
+  private final List<K> keysInInsertionOrder = Lists.newArrayList();
+  private final Map<K, List<V>> data = Maps.newHashMap();
+
+  @Override
+  public void write(K key, V value) {
+
+    // if the user reuses the same writable class, we need to create a new one
+    // otherwise the Map content will be modified after the insert
+    try {
+
+      K keyToUse = key instanceof NullWritable ? key : (K) cloneWritable(key);
+      V valueToUse = (V) cloneWritable(value);
+
+      keysInInsertionOrder.add(keyToUse);
+
+      List<V> points = data.get(key);
+      if (points == null) {
+        points = Lists.newArrayList();
+        data.put(keyToUse, points);
+      }
+      points.add(valueToUse);
+
+    } catch (IOException e) {
+      throw new RuntimeException(e.getMessage(), e);
+    }
+  }
+
+  private Writable cloneWritable(Writable original) throws IOException {
+
+    Writable clone;
+    try {
+      clone = original.getClass().asSubclass(Writable.class).newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException("Unable to instantiate writable!", e);
+    }
+    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+
+    original.write(new DataOutputStream(bytes));
+    clone.readFields(new DataInputStream(new ByteArrayInputStream(bytes.toByteArray())));
+
+    return clone;
+  }
+
+  @Override
+  public void close(TaskAttemptContext context) {
+  }
+
+  public Map<K, List<V>> getData() {
+    return data;
+  }
+
+  public List<V> getValue(K key) {
+    return data.get(key);
+  }
+
+  public Set<K> getKeys() {
+    return data.keySet();
+  }
+
+  public Iterable<K> getKeysInInsertionOrder() {
+    return keysInInsertionOrder;
+  }
+
+  public static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context build(Mapper<K1, V1, K2, V2> mapper,
+                                                                      Configuration configuration,
+                                                                      RecordWriter<K2, V2> output) {
+
+    // Use reflection since the context types changed incompatibly between 0.20
+    // and 0.23.
+    try {
+      return buildNewMapperContext(configuration, output);
+    } catch (Exception e) {
+      try {
+        return buildOldMapperContext(mapper, configuration, output);
+      } catch (Exception ex) {
+        throw new IllegalStateException(ex);
+      }
+    }
+  }
+
+  public static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context build(Reducer<K1, V1, K2, V2> reducer,
+                                                                       Configuration configuration,
+                                                                       RecordWriter<K2, V2> output,
+                                                                       Class<K1> keyClass,
+                                                                       Class<V1> valueClass) {
+
+    // Use reflection since the context types changed incompatibly between 0.20
+    // and 0.23.
+    try {
+      return buildNewReducerContext(configuration, output, keyClass, valueClass);
+    } catch (Exception e) {
+      try {
+        return buildOldReducerContext(reducer, configuration, output, keyClass, valueClass);
+      } catch (Exception ex) {
+        throw new IllegalStateException(ex);
+      }
+    }
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context buildNewMapperContext(
+    Configuration configuration, RecordWriter<K2, V2> output) throws Exception {
+    Class<?> mapContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl");
+    Constructor<?> cons = mapContextImplClass.getConstructors()[0];
+    Object mapContextImpl = cons.newInstance(configuration,
+      new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
+
+    Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
+    Object wrappedMapper = wrappedMapperClass.getConstructor().newInstance();
+    Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
+    return (Mapper.Context) getMapContext.invoke(wrappedMapper, mapContextImpl);
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context buildOldMapperContext(
+    Mapper<K1, V1, K2, V2> mapper, Configuration configuration,
+    RecordWriter<K2, V2> output) throws Exception {
+    Constructor<?> cons = getNestedContextConstructor(mapper.getClass());
+    // first argument to the constructor is the enclosing instance
+    return (Mapper.Context) cons.newInstance(mapper, configuration,
+      new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context buildNewReducerContext(
+    Configuration configuration, RecordWriter<K2, V2> output, Class<K1> keyClass,
+    Class<V1> valueClass) throws Exception {
+    Class<?> reduceContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.ReduceContextImpl");
+    Constructor<?> cons = reduceContextImplClass.getConstructors()[0];
+    Object reduceContextImpl = cons.newInstance(configuration,
+      new TaskAttemptID(),
+      new MockIterator(),
+      null,
+      null,
+      output,
+      null,
+      new DummyStatusReporter(),
+      null,
+      keyClass,
+      valueClass);
+
+    Class<?> wrappedReducerClass = Class.forName("org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer");
+    Object wrappedReducer = wrappedReducerClass.getConstructor().newInstance();
+    Method getReducerContext = wrappedReducerClass.getMethod("getReducerContext", ReduceContext.class);
+    return (Reducer.Context) getReducerContext.invoke(wrappedReducer, reduceContextImpl);
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context buildOldReducerContext(
+    Reducer<K1, V1, K2, V2> reducer, Configuration configuration,
+    RecordWriter<K2, V2> output, Class<K1> keyClass,
+    Class<V1> valueClass) throws Exception {
+    Constructor<?> cons = getNestedContextConstructor(reducer.getClass());
+    // first argument to the constructor is the enclosing instance
+    return (Reducer.Context) cons.newInstance(reducer,
+      configuration,
+      new TaskAttemptID(),
+      new MockIterator(),
+      null,
+      null,
+      output,
+      null,
+      new DummyStatusReporter(),
+      null,
+      keyClass,
+      valueClass);
+  }
+
+  private static Constructor<?> getNestedContextConstructor(Class<?> outerClass) {
+    for (Class<?> nestedClass : outerClass.getClasses()) {
+      if ("Context".equals(nestedClass.getSimpleName())) {
+        return nestedClass.getConstructors()[0];
+      }
+    }
+    throw new IllegalStateException("Cannot find context class for " + outerClass);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/DummyRecordWriterTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/DummyRecordWriterTest.java b/mr/src/test/java/org/apache/mahout/common/DummyRecordWriterTest.java
new file mode 100644
index 0000000..6b25448
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/DummyRecordWriterTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.VectorWritable;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class DummyRecordWriterTest {
+
+  @Test
+  public void testWrite() {
+    DummyRecordWriter<IntWritable, VectorWritable> writer = 
+        new DummyRecordWriter<IntWritable, VectorWritable>();
+    IntWritable reusableIntWritable = new IntWritable();
+    VectorWritable reusableVectorWritable = new VectorWritable();
+    reusableIntWritable.set(0);
+    reusableVectorWritable.set(new DenseVector(new double[] { 1, 2, 3 }));
+    writer.write(reusableIntWritable, reusableVectorWritable);
+    reusableIntWritable.set(1);
+    reusableVectorWritable.set(new DenseVector(new double[] { 4, 5, 6 }));
+    writer.write(reusableIntWritable, reusableVectorWritable);
+
+    Assert.assertEquals(
+        "The writer must remember the two keys that is written to it", 2,
+        writer.getKeys().size());
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/DummyStatusReporter.java b/mr/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
new file mode 100644
index 0000000..c6bc34b
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.mahout.common;
+
+import org.easymock.EasyMock;
+
+import java.util.Map;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.StatusReporter;
+
+public final class DummyStatusReporter extends StatusReporter {
+
+  private final Map<Enum<?>, Counter> counters = Maps.newHashMap();
+  private final Map<String, Counter> counterGroups = Maps.newHashMap();
+
+  private static Counter newCounter() {
+    try {
+      // 0.23 case
+      String c = "org.apache.hadoop.mapreduce.counters.GenericCounter";
+      return (Counter) EasyMock.createMockBuilder(Class.forName(c)).createMock();
+    } catch (ClassNotFoundException e) {
+      // 0.20 case
+      return EasyMock.createMockBuilder(Counter.class).createMock();
+    }
+  }
+
+  @Override
+  public Counter getCounter(Enum<?> name) {
+    if (!counters.containsKey(name)) {
+      counters.put(name, newCounter());
+    }
+    return counters.get(name);
+  }
+
+
+  @Override
+  public Counter getCounter(String group, String name) {
+    if (!counterGroups.containsKey(group + name)) {
+      counterGroups.put(group + name, newCounter());
+    }
+    return counterGroups.get(group+name);
+  }
+
+  @Override
+  public void progress() {
+  }
+
+  @Override
+  public void setStatus(String status) {
+  }
+
+  @Override
+  public float getProgress() {
+    return 0.0f;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/IntPairWritableTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/IntPairWritableTest.java b/mr/src/test/java/org/apache/mahout/common/IntPairWritableTest.java
new file mode 100644
index 0000000..ceffe3e
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/IntPairWritableTest.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.util.Arrays;
+
+import org.junit.Test;
+
+public final class IntPairWritableTest extends MahoutTestCase {
+
+  @Test
+  public void testGetSet() {
+    IntPairWritable n = new IntPairWritable();
+    
+    assertEquals(0, n.getFirst());
+    assertEquals(0, n.getSecond());
+    
+    n.setFirst(5);
+    n.setSecond(10);
+    
+    assertEquals(5, n.getFirst());
+    assertEquals(10, n.getSecond());
+    
+    n = new IntPairWritable(2,4);
+    
+    assertEquals(2, n.getFirst());
+    assertEquals(4, n.getSecond());
+  }
+  
+  @Test
+  public void testWritable() throws Exception {
+    IntPairWritable one = new IntPairWritable(1,2);
+    IntPairWritable two = new IntPairWritable(3,4);
+    
+    assertEquals(1, one.getFirst());
+    assertEquals(2, one.getSecond());
+    
+    assertEquals(3, two.getFirst());
+    assertEquals(4, two.getSecond());
+    
+    
+    ByteArrayOutputStream bout = new ByteArrayOutputStream();
+    DataOutput out = new DataOutputStream(bout);
+    
+    two.write(out);
+    
+    byte[] b = bout.toByteArray();
+    
+    ByteArrayInputStream bin = new ByteArrayInputStream(b);
+    DataInput din = new DataInputStream(bin);
+    
+    one.readFields(din);
+    
+    assertEquals(two.getFirst(), one.getFirst());
+    assertEquals(two.getSecond(), one.getSecond());    
+  }
+  
+  @Test
+  public void testComparable() {
+    IntPairWritable[] input = {
+        new IntPairWritable(2,3),
+        new IntPairWritable(2,2),
+        new IntPairWritable(1,3),
+        new IntPairWritable(1,2),
+        new IntPairWritable(2,1),
+        new IntPairWritable(2,2),
+        new IntPairWritable(1,-2),
+        new IntPairWritable(1,-1),
+        new IntPairWritable(-2,-2),
+        new IntPairWritable(-2,-1),
+        new IntPairWritable(-1,-1),
+        new IntPairWritable(-1,-2),
+        new IntPairWritable(Integer.MAX_VALUE,1),
+        new IntPairWritable(Integer.MAX_VALUE/2,1),
+        new IntPairWritable(Integer.MIN_VALUE,1),
+        new IntPairWritable(Integer.MIN_VALUE/2,1)
+        
+    };
+    
+    IntPairWritable[] sorted = new IntPairWritable[input.length];
+    System.arraycopy(input, 0, sorted, 0, input.length);
+    Arrays.sort(sorted);
+    
+    int[] expected = {
+        14, 15, 8, 9, 11, 10, 6, 7, 3, 2, 4, 1, 5, 0, 13, 12
+    };
+    
+    for (int i=0; i < input.length; i++) {
+      assertSame(input[expected[i]], sorted[i]);
+    }
+ 
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/MahoutTestCase.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/MahoutTestCase.java b/mr/src/test/java/org/apache/mahout/common/MahoutTestCase.java
new file mode 100644
index 0000000..775c8d8
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/MahoutTestCase.java
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.lang.reflect.Field;
+
+import com.google.common.base.Charsets;
+import com.google.common.io.Closeables;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.After;
+import org.junit.Before;
+
+public class MahoutTestCase extends org.apache.mahout.math.MahoutTestCase {
+
+  /** "Close enough" value for floating-point comparisons. */
+  public static final double EPSILON = 0.000001;
+
+  private Path testTempDirPath;
+  private FileSystem fs;
+
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    RandomUtils.useTestSeed();
+    testTempDirPath = null;
+    fs = null;
+  }
+
+  @Override
+  @After
+  public void tearDown() throws Exception {
+    if (testTempDirPath != null) {
+      try {
+        fs.delete(testTempDirPath, true);
+      } catch (IOException e) {
+        throw new IllegalStateException("Test file not found");
+      }
+      testTempDirPath = null;
+      fs = null;
+    }
+    super.tearDown();
+  }
+
+  public final Configuration getConfiguration() throws IOException {
+    Configuration conf = new Configuration();
+    conf.set("hadoop.tmp.dir", getTestTempDir("hadoop" + Math.random()).getAbsolutePath());
+    return conf;
+  }
+
+  protected final Path getTestTempDirPath() throws IOException {
+    if (testTempDirPath == null) {
+      fs = FileSystem.get(getConfiguration());
+      long simpleRandomLong = (long) (Long.MAX_VALUE * Math.random());
+      testTempDirPath = fs.makeQualified(
+          new Path("/tmp/mahout-" + getClass().getSimpleName() + '-' + simpleRandomLong));
+      if (!fs.mkdirs(testTempDirPath)) {
+        throw new IOException("Could not create " + testTempDirPath);
+      }
+      fs.deleteOnExit(testTempDirPath);
+    }
+    return testTempDirPath;
+  }
+
+  protected final Path getTestTempFilePath(String name) throws IOException {
+    return getTestTempFileOrDirPath(name, false);
+  }
+
+  protected final Path getTestTempDirPath(String name) throws IOException {
+    return getTestTempFileOrDirPath(name, true);
+  }
+
+  private Path getTestTempFileOrDirPath(String name, boolean dir) throws IOException {
+    Path testTempDirPath = getTestTempDirPath();
+    Path tempFileOrDir = fs.makeQualified(new Path(testTempDirPath, name));
+    fs.deleteOnExit(tempFileOrDir);
+    if (dir && !fs.mkdirs(tempFileOrDir)) {
+      throw new IOException("Could not create " + tempFileOrDir);
+    }
+    return tempFileOrDir;
+  }
+
+  /**
+   * Try to directly set a (possibly private) field on an Object
+   */
+  protected static void setField(Object target, String fieldname, Object value)
+    throws NoSuchFieldException, IllegalAccessException {
+    Field field = findDeclaredField(target.getClass(), fieldname);
+    field.setAccessible(true);
+    field.set(target, value);
+  }
+
+  /**
+   * Find a declared field in a class or one of it's super classes
+   */
+  private static Field findDeclaredField(Class<?> inClass, String fieldname) throws NoSuchFieldException {
+    while (!Object.class.equals(inClass)) {
+      for (Field field : inClass.getDeclaredFields()) {
+        if (field.getName().equalsIgnoreCase(fieldname)) {
+          return field;
+        }
+      }
+      inClass = inClass.getSuperclass();
+    }
+    throw new NoSuchFieldException();
+  }
+
+  /**
+   * @return a job option key string (--name) from the given option name
+   */
+  protected static String optKey(String optionName) {
+    return AbstractJob.keyFor(optionName);
+  }
+
+  protected static void writeLines(File file, String... lines) throws IOException {
+    Writer writer = new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8);
+    try {
+      for (String line : lines) {
+        writer.write(line);
+        writer.write('\n');
+      }
+    } finally {
+      Closeables.close(writer, false);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/MockIterator.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/MockIterator.java b/mr/src/test/java/org/apache/mahout/common/MockIterator.java
new file mode 100644
index 0000000..ce48fdc
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/MockIterator.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common;
+
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.mapred.RawKeyValueIterator;
+import org.apache.hadoop.util.Progress;
+
+public final class MockIterator implements RawKeyValueIterator {
+
+  @Override
+  public void close() {
+  }
+
+  @Override
+  public DataInputBuffer getKey() {
+    return null;
+  }
+
+  @Override
+  public Progress getProgress() {
+    return null;
+  }
+
+  @Override
+  public DataInputBuffer getValue() {
+
+    return null;
+  }
+
+  @Override
+  public boolean next() {
+    return true;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/StringUtilsTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/StringUtilsTest.java b/mr/src/test/java/org/apache/mahout/common/StringUtilsTest.java
new file mode 100644
index 0000000..0633685
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/StringUtilsTest.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common;
+
+import com.google.common.collect.Lists;
+import org.junit.Test;
+
+import java.util.List;
+
+public final class StringUtilsTest extends MahoutTestCase {
+
+  private static class DummyTest {
+    private int field;
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (!(obj instanceof DummyTest)) {
+        return false;
+      }
+
+      DummyTest dt = (DummyTest) obj;
+      return field == dt.field;
+    }
+
+    @Override
+    public int hashCode() {
+      return field;
+    }
+
+    public int getField() {
+      return field;
+    }
+  }
+
+  @Test
+  public void testStringConversion() throws Exception {
+
+    List<String> expected = Lists.newArrayList("A", "B", "C");
+    assertEquals(expected, StringUtils.fromString(StringUtils
+        .toString(expected)));
+
+    // test a non serializable object
+    DummyTest test = new DummyTest();
+    assertEquals(test, StringUtils.fromString(StringUtils.toString(test)));
+  }
+
+  @Test
+  public void testEscape() throws Exception {
+    String res = StringUtils.escapeXML("\",\',&,>,<");
+    assertEquals("_,_,_,_,_", res);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/CosineDistanceMeasureTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/CosineDistanceMeasureTest.java b/mr/src/test/java/org/apache/mahout/common/distance/CosineDistanceMeasureTest.java
new file mode 100644
index 0000000..6db7c9b
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/CosineDistanceMeasureTest.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.SequentialAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public final class CosineDistanceMeasureTest extends MahoutTestCase {
+
+  @Test
+  public void testMeasure() {
+
+    DistanceMeasure distanceMeasure = new CosineDistanceMeasure();
+
+    Vector[] vectors = {
+        new DenseVector(new double[]{1, 0, 0, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 1, 1, 1})
+    };
+
+    double[][] distanceMatrix = new double[3][3];
+
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        distanceMatrix[a][b] = distanceMeasure.distance(vectors[a], vectors[b]);
+      }
+    }
+
+    assertEquals(0.0, distanceMatrix[0][0], EPSILON);
+    assertTrue(distanceMatrix[0][0] < distanceMatrix[0][1]);
+    assertTrue(distanceMatrix[0][1] < distanceMatrix[0][2]);
+
+    assertEquals(0.0, distanceMatrix[1][1], EPSILON);
+    assertTrue(distanceMatrix[1][0] > distanceMatrix[1][1]);
+    assertTrue(distanceMatrix[1][2] < distanceMatrix[1][0]);
+
+    assertEquals(0.0, distanceMatrix[2][2], EPSILON);
+    assertTrue(distanceMatrix[2][0] > distanceMatrix[2][1]);
+    assertTrue(distanceMatrix[2][1] > distanceMatrix[2][2]);
+
+    // Two equal vectors (despite them being zero) should have 0 distance.
+    assertEquals(0, 
+                 distanceMeasure.distance(new SequentialAccessSparseVector(1),
+                                          new SequentialAccessSparseVector(1)), 
+                 EPSILON);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java b/mr/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
new file mode 100644
index 0000000..ad1608c
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public abstract class DefaultDistanceMeasureTest extends MahoutTestCase {
+
+  protected abstract DistanceMeasure distanceMeasureFactory();
+
+  @Test
+  public void testMeasure() {
+
+    DistanceMeasure distanceMeasure = distanceMeasureFactory();
+
+    Vector[] vectors = {
+        new DenseVector(new double[]{1, 1, 1, 1, 1, 1}),
+        new DenseVector(new double[]{2, 2, 2, 2, 2, 2}),
+        new DenseVector(new double[]{6, 6, 6, 6, 6, 6}),
+        new DenseVector(new double[]{-1,-1,-1,-1,-1,-1})
+    };
+
+    compare(distanceMeasure, vectors);
+
+    vectors = new Vector[4];
+    
+    vectors[0] = new RandomAccessSparseVector(5);
+    vectors[0].setQuick(0, 1);
+    vectors[0].setQuick(3, 1);
+    vectors[0].setQuick(4, 1);
+
+    vectors[1] = new RandomAccessSparseVector(5);
+    vectors[1].setQuick(0, 2);
+    vectors[1].setQuick(3, 2);
+    vectors[1].setQuick(4, 2);
+
+    vectors[2] = new RandomAccessSparseVector(5);
+    vectors[2].setQuick(0, 6);
+    vectors[2].setQuick(3, 6);
+    vectors[2].setQuick(4, 6);
+    
+    vectors[3] = new RandomAccessSparseVector(5);
+
+    compare(distanceMeasure, vectors);
+  }
+
+  private static void compare(DistanceMeasure distanceMeasure, Vector[] vectors) {
+     double[][] distanceMatrix = new double[4][4];
+
+     for (int a = 0; a < 4; a++) {
+       for (int b = 0; b < 4; b++) {
+        distanceMatrix[a][b] = distanceMeasure.distance(vectors[a], vectors[b]);
+      }
+    }
+
+    assertEquals("Distance from first vector to itself is not zero", 0.0, distanceMatrix[0][0], EPSILON);
+    assertTrue(distanceMatrix[0][0] < distanceMatrix[0][1]);
+    assertTrue(distanceMatrix[0][1] < distanceMatrix[0][2]);
+
+    assertEquals("Distance from second vector to itself is not zero", 0.0, distanceMatrix[1][1], EPSILON);
+    assertTrue(distanceMatrix[1][0] > distanceMatrix[1][1]);
+    assertTrue(distanceMatrix[1][2] > distanceMatrix[1][0]);
+
+    assertEquals("Distance from third vector to itself is not zero", 0.0, distanceMatrix[2][2], EPSILON);
+    assertTrue(distanceMatrix[2][0] > distanceMatrix[2][1]);
+    assertTrue(distanceMatrix[2][1] > distanceMatrix[2][2]);
+
+    for (int a = 0; a < 4; a++) {
+      for (int b = 0; b < 4; b++) {
+        assertTrue("Distance between vectors less than zero: " 
+                   + distanceMatrix[a][b] + " = " + distanceMeasure 
+                   + ".distance("+ vectors[a].asFormatString() + ", " 
+                   + vectors[b].asFormatString() + ')',
+                   distanceMatrix[a][b] >= 0);
+        if (vectors[a].plus(vectors[b]).norm(2) == 0 && vectors[a].norm(2) > 0) {
+          assertTrue("Distance from v to -v is equal to zero" 
+                     + vectors[a].asFormatString() + " = -" + vectors[b].asFormatString(), 
+                     distanceMatrix[a][b] > 0);
+        }
+      }
+    }
+          
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/DefaultWeightedDistanceMeasureTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/DefaultWeightedDistanceMeasureTest.java b/mr/src/test/java/org/apache/mahout/common/distance/DefaultWeightedDistanceMeasureTest.java
new file mode 100644
index 0000000..a8f1d0b
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/DefaultWeightedDistanceMeasureTest.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public abstract class DefaultWeightedDistanceMeasureTest extends DefaultDistanceMeasureTest {
+
+  @Override
+  public abstract WeightedDistanceMeasure distanceMeasureFactory();
+
+  @Test
+  public void testMeasureWeighted() {
+
+    WeightedDistanceMeasure distanceMeasure = distanceMeasureFactory();
+
+    Vector[] vectors = {
+        new DenseVector(new double[]{9, 9, 1}),
+        new DenseVector(new double[]{1, 9, 9}),
+        new DenseVector(new double[]{9, 1, 9}),
+    };
+    distanceMeasure.setWeights(new DenseVector(new double[]{1, 1000, 1}));
+
+    double[][] distanceMatrix = new double[3][3];
+
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        distanceMatrix[a][b] = distanceMeasure.distance(vectors[a], vectors[b]);
+      }
+    }
+
+    assertEquals(0.0, distanceMatrix[0][0], EPSILON);
+    assertTrue(distanceMatrix[0][1] < distanceMatrix[0][2]);
+
+
+  }
+
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java b/mr/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
new file mode 100644
index 0000000..185adf3
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public final class TestChebyshevMeasure extends MahoutTestCase {
+
+  @Test
+  public void testMeasure() {
+
+    DistanceMeasure chebyshevDistanceMeasure = new ChebyshevDistanceMeasure();
+
+    Vector[] vectors = {
+        new DenseVector(new double[]{1, 0, 0, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 1, 1, 1})
+    };
+    double[][] distances = {{0.0, 1.0, 1.0}, {1.0, 0.0, 1.0}, {1.0, 1.0, 0.0}};
+    
+    double[][] chebyshevDistanceMatrix = new double[3][3];
+
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        chebyshevDistanceMatrix[a][b] = chebyshevDistanceMeasure.distance(vectors[a], vectors[b]);
+      }
+    }
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        assertEquals(distances[a][b], chebyshevDistanceMatrix[a][b], EPSILON);
+      }
+    }
+
+    assertEquals(0.0, chebyshevDistanceMatrix[0][0], EPSILON);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/TestEuclideanDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/TestEuclideanDistanceMeasure.java b/mr/src/test/java/org/apache/mahout/common/distance/TestEuclideanDistanceMeasure.java
new file mode 100644
index 0000000..cc9e9e7
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/TestEuclideanDistanceMeasure.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+public final class TestEuclideanDistanceMeasure extends DefaultDistanceMeasureTest {
+
+  @Override
+  public DistanceMeasure distanceMeasureFactory() {
+    return new EuclideanDistanceMeasure();
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java b/mr/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java
new file mode 100644
index 0000000..8e3d205
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+
+/**
+ * To launch this test only : mvn test -Dtest=org.apache.mahout.common.distance.TestMahalanobisDistanceMeasure
+ */
+public final class TestMahalanobisDistanceMeasure extends MahoutTestCase {
+
+  @Test
+  public void testMeasure() {
+    double[][] invCovValues = { { 2.2, 0.4 }, { 0.4, 2.8 } };
+    double[] meanValues = { -2.3, -0.9 };
+    Matrix invCov = new DenseMatrix(invCovValues);
+    Vector meanVector = new DenseVector(meanValues);
+    MahalanobisDistanceMeasure distanceMeasure = new MahalanobisDistanceMeasure();
+    distanceMeasure.setInverseCovarianceMatrix(invCov);
+    distanceMeasure.setMeanVector(meanVector);
+    double[] v1 = { -1.9, -2.3 };
+    double[] v2 = { -2.9, -1.3 };
+    double dist = distanceMeasure.distance(new DenseVector(v1),new DenseVector(v2));
+    assertEquals(2.0493901531919194, dist, EPSILON);
+    //now set the covariance Matrix
+    distanceMeasure.setCovarianceMatrix(invCov);
+    //check the inverse covariance times covariance equals identity 
+    Matrix identity = distanceMeasure.getInverseCovarianceMatrix().times(invCov);
+    assertEquals(1, identity.get(0,0), EPSILON);
+    assertEquals(1, identity.get(1,1), EPSILON);
+    assertEquals(0, identity.get(1,0), EPSILON);
+    assertEquals(0, identity.get(0,1), EPSILON);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/TestManhattanDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/TestManhattanDistanceMeasure.java b/mr/src/test/java/org/apache/mahout/common/distance/TestManhattanDistanceMeasure.java
new file mode 100644
index 0000000..97a5612
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/TestManhattanDistanceMeasure.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+public final class TestManhattanDistanceMeasure extends DefaultDistanceMeasureTest {
+
+  @Override
+  public DistanceMeasure distanceMeasureFactory() {
+    return new ManhattanDistanceMeasure();
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java b/mr/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
new file mode 100644
index 0000000..d2cd85e
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public final class TestMinkowskiMeasure extends MahoutTestCase {
+
+  @Test
+  public void testMeasure() {
+
+    DistanceMeasure minkowskiDistanceMeasure = new MinkowskiDistanceMeasure(1.5);
+    DistanceMeasure manhattanDistanceMeasure = new ManhattanDistanceMeasure();
+    DistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
+
+    Vector[] vectors = {
+        new DenseVector(new double[]{1, 0, 0, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 1, 1, 1})
+    };
+
+    double[][] minkowskiDistanceMatrix = new double[3][3];
+    double[][] manhattanDistanceMatrix = new double[3][3];
+    double[][] euclideanDistanceMatrix = new double[3][3];
+
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        minkowskiDistanceMatrix[a][b] = minkowskiDistanceMeasure.distance(vectors[a], vectors[b]);
+        manhattanDistanceMatrix[a][b] = manhattanDistanceMeasure.distance(vectors[a], vectors[b]);
+        euclideanDistanceMatrix[a][b] = euclideanDistanceMeasure.distance(vectors[a], vectors[b]);
+      }
+    }
+
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        assertTrue(minkowskiDistanceMatrix[a][b] <= manhattanDistanceMatrix[a][b]);
+        assertTrue(minkowskiDistanceMatrix[a][b] >= euclideanDistanceMatrix[a][b]);
+      }
+    }
+
+    assertEquals(0.0, minkowskiDistanceMatrix[0][0], EPSILON);
+    assertTrue(minkowskiDistanceMatrix[0][0] < minkowskiDistanceMatrix[0][1]);
+    assertTrue(minkowskiDistanceMatrix[0][1] < minkowskiDistanceMatrix[0][2]);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/TestTanimotoDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/TestTanimotoDistanceMeasure.java b/mr/src/test/java/org/apache/mahout/common/distance/TestTanimotoDistanceMeasure.java
new file mode 100644
index 0000000..01f9134
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/TestTanimotoDistanceMeasure.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+public final class TestTanimotoDistanceMeasure extends DefaultWeightedDistanceMeasureTest {
+  @Override
+  public TanimotoDistanceMeasure distanceMeasureFactory() {
+    return new TanimotoDistanceMeasure();
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/TestWeightedEuclideanDistanceMeasureTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/TestWeightedEuclideanDistanceMeasureTest.java b/mr/src/test/java/org/apache/mahout/common/distance/TestWeightedEuclideanDistanceMeasureTest.java
new file mode 100644
index 0000000..b99d165
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/TestWeightedEuclideanDistanceMeasureTest.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+public final class TestWeightedEuclideanDistanceMeasureTest extends DefaultWeightedDistanceMeasureTest {
+  @Override
+  public WeightedDistanceMeasure distanceMeasureFactory() {
+    return new WeightedEuclideanDistanceMeasure();
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/distance/TestWeightedManhattanDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/distance/TestWeightedManhattanDistanceMeasure.java b/mr/src/test/java/org/apache/mahout/common/distance/TestWeightedManhattanDistanceMeasure.java
new file mode 100644
index 0000000..77d4a01
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/distance/TestWeightedManhattanDistanceMeasure.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+public final class TestWeightedManhattanDistanceMeasure extends DefaultWeightedDistanceMeasureTest {
+
+  @Override
+  public WeightedManhattanDistanceMeasure distanceMeasureFactory() {
+    return new WeightedManhattanDistanceMeasure();
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/iterator/CountingIteratorTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/iterator/CountingIteratorTest.java b/mr/src/test/java/org/apache/mahout/common/iterator/CountingIteratorTest.java
new file mode 100644
index 0000000..d38178c
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/iterator/CountingIteratorTest.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.iterator;
+
+import java.util.Iterator;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.junit.Test;
+
+public final class CountingIteratorTest extends MahoutTestCase {
+
+  @Test
+  public void testEmptyCase() {
+    assertFalse(new CountingIterator(0).hasNext());
+  }
+
+  @Test
+  public void testCount() {
+    Iterator<Integer> it = new CountingIterator(3);
+    assertTrue(it.hasNext());
+    assertEquals(0, (int) it.next());
+    assertTrue(it.hasNext());
+    assertEquals(1, (int) it.next());
+    assertTrue(it.hasNext());
+    assertEquals(2, (int) it.next());
+    assertFalse(it.hasNext());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java b/mr/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java
new file mode 100644
index 0000000..b67d34b
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.iterator;
+
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.junit.Test;
+
+public abstract class SamplerCase extends MahoutTestCase {
+  // these provide access to the underlying implementation
+  protected abstract Iterator<Integer> createSampler(int n, Iterator<Integer> source);
+
+  protected abstract boolean isSorted();
+
+  @Test
+  public void testEmptyCase() {
+    assertFalse(createSampler(100, new CountingIterator(0)).hasNext());
+  }
+
+  @Test
+  public void testSmallInput() {
+    Iterator<Integer> t = createSampler(10, new CountingIterator(1));
+    assertTrue(t.hasNext());
+    assertEquals(0, t.next().intValue());
+    assertFalse(t.hasNext());
+
+    t = createSampler(10, new CountingIterator(1));
+    assertTrue(t.hasNext());
+    assertEquals(0, t.next().intValue());
+    assertFalse(t.hasNext());
+  }
+
+  @Test
+  public void testAbsurdSize() {
+    Iterator<Integer> t = createSampler(0, new CountingIterator(2));
+    assertFalse(t.hasNext());
+  }
+
+  @Test
+  public void testExactSizeMatch() {
+    Iterator<Integer> t = createSampler(10, new CountingIterator(10));
+    for (int i = 0; i < 10; i++) {
+      assertTrue(t.hasNext());
+      assertEquals(i, t.next().intValue());
+    }
+    assertFalse(t.hasNext());
+  }
+
+  @Test
+  public void testSample() {
+    Iterator<Integer> source = new CountingIterator(100);
+    Iterator<Integer> t = createSampler(15, source);
+
+    // this is just a regression test, not a real test
+    List<Integer> expectedValues = Arrays.asList(52,28,2,60,50,32,65,79,78,9,40,33,96,25,48);
+    if (isSorted()) {
+      Collections.sort(expectedValues);
+    }
+    Iterator<Integer> expected = expectedValues.iterator();
+    int last = Integer.MIN_VALUE;
+    for (int i = 0; i < 15; i++) {
+      assertTrue(t.hasNext());
+      int actual = t.next();
+      if (isSorted()) {
+        assertTrue(actual >= last);
+        last = actual;
+      } else {
+        // any of the first few values should be in the original places
+        if (actual < 15) {
+          assertEquals(i, actual);
+        }
+      }
+
+      assertTrue(actual >= 0 && actual < 100);
+
+      // this is just a regression test, but still of some value
+      assertEquals(expected.next().intValue(), actual);
+      assertFalse(source.hasNext());
+    }
+    assertFalse(t.hasNext());
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/iterator/TestFixedSizeSampler.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/iterator/TestFixedSizeSampler.java b/mr/src/test/java/org/apache/mahout/common/iterator/TestFixedSizeSampler.java
new file mode 100644
index 0000000..470e6d8
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/iterator/TestFixedSizeSampler.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.iterator;
+
+import java.util.Iterator;
+
+public final class TestFixedSizeSampler extends SamplerCase {
+
+  @Override
+  protected Iterator<Integer> createSampler(int n, Iterator<Integer> source) {
+    return new FixedSizeSamplingIterator<Integer>(n, source);
+  }
+
+  @Override
+  protected boolean isSorted() {
+    return false;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java b/mr/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java
new file mode 100644
index 0000000..970ea79
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.iterator;
+
+import java.util.Iterator;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.junit.Test;
+
+public final class TestSamplingIterator extends MahoutTestCase {
+
+  @Test
+  public void testEmptyCase() {
+    assertFalse(new SamplingIterator<Integer>(new CountingIterator(0), 0.9999).hasNext());
+    assertFalse(new SamplingIterator<Integer>(new CountingIterator(0), 1).hasNext());
+  }
+
+  @Test
+  public void testSmallInput() {
+    Iterator<Integer> t = new SamplingIterator<Integer>(new CountingIterator(1), 0.9999);
+    assertTrue(t.hasNext());
+    assertEquals(0, t.next().intValue());
+    assertFalse(t.hasNext());
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testBadRate1() {
+    new SamplingIterator<Integer>(new CountingIterator(1), 0.0);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testBadRate2() {
+    new SamplingIterator<Integer>(new CountingIterator(1), 1.1);
+  }
+
+  @Test
+  public void testExactSizeMatch() {
+    Iterator<Integer> t = new SamplingIterator<Integer>(new CountingIterator(10), 1);
+    for (int i = 0; i < 10; i++) {
+      assertTrue(t.hasNext());
+      assertEquals(i, t.next().intValue());
+    }
+    assertFalse(t.hasNext());
+  }
+
+  @Test
+  public void testSample() {
+    for (int i = 0; i < 1000; i++) {
+      Iterator<Integer> t = new SamplingIterator<Integer>(new CountingIterator(1000), 0.1);
+      int k = 0;
+      while (t.hasNext()) {
+        int v = t.next();
+        k++;
+        assertTrue(v >= 0);
+        assertTrue(v < 1000);
+      }
+      double sd = Math.sqrt(0.9 * 0.1 * 1000);
+      assertTrue(k >= 100 - 4 * sd);
+      assertTrue(k <= 100 + 4 * sd);
+    }
+  }
+}