You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@mahout.apache.org by tc...@apache.org on 2012/03/12 19:25:46 UTC

svn commit: r1299770 - in /mahout/trunk: ./ core/ core/src/main/java/org/apache/mahout/common/ core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/ core/src/test/java/org/apache/mahout/clustering/canopy/ core/src/test/java/org/apache/m...

Author: tcp
Date: Mon Mar 12 18:25:45 2012
New Revision: 1299770

URL: http://svn.apache.org/viewvc?rev=1299770&view=rev
Log:
MAHOUT-822: Make Mahout compatible with Hadoop 0.23.1.

Modified:
    mahout/trunk/core/pom.xml
    mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
    mahout/trunk/pom.xml

Modified: mahout/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/pom.xml (original)
+++ mahout/trunk/core/pom.xml Mon Mar 12 18:25:45 2012
@@ -140,10 +140,6 @@
 
     <!-- Third Party -->
     <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-core</artifactId>
-    </dependency>
-    <dependency>
       <groupId>org.codehaus.jackson</groupId>
       <artifactId>jackson-core-asl</artifactId>
     </dependency>
@@ -211,4 +207,43 @@
     </dependency>
 
   </dependencies>
+  
+  <profiles>
+    <profile>
+      <id>hadoop-0.20</id>
+      <activation>
+        <property>
+          <name>!hadoop.version</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+    <profile>
+      <id>hadoop-0.23</id>
+      <activation>
+        <property>
+          <name>hadoop.version</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce-client-common</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce-client-core</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Mon Mar 12 18:25:45 2012
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.common;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
@@ -229,9 +230,9 @@ public final class HadoopUtil {
     FileStatus[] statuses;
     FileSystem fs = path.getFileSystem(conf);
     if (filter == null) {
-      statuses = pathType == PathType.GLOB ? fs.globStatus(path) : fs.listStatus(path);
+      statuses = pathType == PathType.GLOB ? fs.globStatus(path) : listStatus(fs, path);
     } else {
-      statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : fs.listStatus(path, filter);
+      statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : listStatus(fs, path, filter);
     }
     if (ordering != null) {
       Arrays.sort(statuses, ordering);
@@ -239,6 +240,22 @@ public final class HadoopUtil {
     return statuses;
   }
 
+  public static FileStatus[] listStatus(FileSystem fs, Path path) throws IOException {
+    try {
+      return fs.listStatus(path);
+    } catch (FileNotFoundException e) {
+      return new FileStatus[0];
+    }
+  }
+
+  public static FileStatus[] listStatus(FileSystem fs, Path path, PathFilter filter) throws IOException {
+    try {
+      return fs.listStatus(path, filter);
+    } catch (FileNotFoundException e) {
+      return new FileStatus[0];
+    }
+  }
+
   public static void cacheFiles(Path fileToCache, Configuration conf) {
     DistributedCache.setCacheFiles(new URI[]{fileToCache.toUri()}, conf);
   }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java Mon Mar 12 18:25:45 2012
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.df.mapreduce.partial;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
-
-/**
- * Special implementation that collects the output of the mappers
- */
-final class MockContext extends Context {
-
-  private final TreeID[] keys;
-  private final MapredOutput[] values;
-  private int index;
-
-  MockContext(Mapper<?,?,?,?> mapper, Configuration conf, TaskAttemptID taskid, int nbTrees)
-    throws IOException, InterruptedException {
-    mapper.super(conf, taskid, null, null, null, null, null);
-
-    keys = new TreeID[nbTrees];
-    values = new MapredOutput[nbTrees];
-  }
-
-  @Override
-  public void write(Object key, Object value) throws IOException {
-    if (index == keys.length) {
-      throw new IOException("Received more output than expected : " + index);
-    }
-
-    keys[index] = ((TreeID) key).clone();
-    values[index] = ((MapredOutput) value).clone();
-
-    index++;
-  }
-
-  /**
-   * @return number of outputs collected
-   */
-  public int nbOutputs() {
-    return index;
-  }
-
-  public TreeID[] getKeys() {
-    return keys;
-  }
-
-  public MapredOutput[] getValues() {
-    return values;
-  }
-}

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java Mon Mar 12 18:25:45 2012
@@ -1,176 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.classifier.df.mapreduce.partial;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.mahout.classifier.df.DFUtils;
-import org.apache.mahout.classifier.df.DecisionForest;
-import org.apache.mahout.classifier.df.builder.TreeBuilder;
-import org.apache.mahout.classifier.df.data.Dataset;
-import org.apache.mahout.classifier.df.mapreduce.Builder;
-import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
-import org.apache.mahout.classifier.df.node.Node;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.collect.Lists;
-
-/**
- * Simulates the Partial mapreduce implementation in a sequential manner. Must
- * receive a seed
- */
-public class PartialSequentialBuilder extends PartialBuilder {
-
-  private static final Logger log = LoggerFactory.getLogger(PartialSequentialBuilder.class);
-
-  private MockContext firstOutput;
-
-  private final Dataset dataset;
-
-  public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
-      Dataset dataset, long seed, Configuration conf) {
-    super(treeBuilder, dataPath, new Path("notUsed"), seed, conf);
-    this.dataset = dataset;
-  }
-
-  public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
-      Dataset dataset, long seed) {
-    this(treeBuilder, dataPath, dataset, seed, new Configuration());
-  }
-
-  @Override
-  protected void configureJob(Job job)
-      throws IOException {
-    Configuration conf = job.getConfiguration();
-    
-    int num = conf.getInt("mapred.map.tasks", -1);
-
-    super.configureJob(job);
-
-    // PartialBuilder sets the number of maps to 1 if we are running in 'local'
-    conf.setInt("mapred.map.tasks", num);
-  }
-
-  @Override
-  protected boolean runJob(Job job) throws IOException, InterruptedException {
-    Configuration conf = job.getConfiguration();
-    
-    // retrieve the splits
-    TextInputFormat input = new TextInputFormat();
-    List<InputSplit> splits = input.getSplits(job);
-    
-    int nbSplits = splits.size();
-    log.debug("Nb splits : {}", nbSplits);
-
-    InputSplit[] sorted = new InputSplit[nbSplits];
-    splits.toArray(sorted);
-    Builder.sortSplits(sorted);
-
-    int numTrees = Builder.getNbTrees(conf); // total number of trees
-
-    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());
-
-    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);
-
-    /* first instance id in hadoop's order */
-    //int[] firstIds = new int[nbSplits];
-    /* partitions' sizes in hadoop order */
-    int[] sizes = new int[nbSplits];
-    
-    // to compute firstIds, process the splits in file order
-    long slowest = 0; // duration of slowest map
-    int firstId = 0;
-    for (InputSplit split : splits) {
-      int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
-
-      RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
-      reader.initialize(split, task);
-
-      Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(),
-                                               hp, nbSplits, numTrees);
-
-      long time = System.currentTimeMillis();
-
-      //firstIds[hp] = firstId;
-
-      while (reader.nextKeyValue()) {
-        mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
-        firstId++;
-        sizes[hp]++;
-      }
-
-      mapper.cleanup(firstOutput);
-
-      time = System.currentTimeMillis() - time;
-      log.info("Duration : {}", DFUtils.elapsedTime(time));
-
-      if (time > slowest) {
-        slowest = time;
-      }
-    }
-
-    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
-    return true;
-  }
-
-  @Override
-  protected DecisionForest parseOutput(Job job) throws IOException {
-    return processOutput(firstOutput.getKeys(), firstOutput.getValues());
-  }
-
-  /**
-   * extract the decision forest
-   */
-  protected static DecisionForest processOutput(TreeID[] keys, MapredOutput[] values) {
-    List<Node> trees = Lists.newArrayList();
-
-    for (int index = 0; index < keys.length; index++) {
-      MapredOutput value = values[index];
-      trees.add(value.getTree());
-    }
-    
-    return new DecisionForest(trees);
-  }
-
-  /**
-   * Special Step1Mapper that can be configured without using a Configuration
-   * 
-   */
-  private static class MockStep1Mapper extends Step1Mapper {
-    protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
-        int partition, int numMapTasks, int numTrees) {
-      configure(false, treeBuilder, dataset);
-      configure(seed, partition, numMapTasks, numTrees);
-    }
-
-  }
-
-}

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java Mon Mar 12 18:25:45 2012
@@ -17,21 +17,30 @@
 
 package org.apache.mahout.classifier.df.mapreduce.partial;
 
+import static org.easymock.EasyMock.anyObject;
+import static org.easymock.EasyMock.capture;
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expectLastCall;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.verify;
+
 import java.util.Random;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.mahout.common.MahoutTestCase;
+import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.classifier.df.builder.TreeBuilder;
 import org.apache.mahout.classifier.df.data.Data;
 import org.apache.mahout.classifier.df.data.DataLoader;
 import org.apache.mahout.classifier.df.data.Dataset;
 import org.apache.mahout.classifier.df.data.Utils;
+import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
 import org.apache.mahout.classifier.df.node.Leaf;
 import org.apache.mahout.classifier.df.node.Node;
+import org.apache.mahout.common.MahoutTestCase;
+import org.easymock.Capture;
+import org.easymock.CaptureType;
 import org.junit.Test;
 
 public final class Step1MapperTest extends MahoutTestCase {
@@ -71,6 +80,17 @@ public final class Step1MapperTest exten
     }
   }
 
+  private static class TreeIDCapture extends Capture<TreeID> {
+
+    public TreeIDCapture() {
+      super(CaptureType.ALL);
+    }
+
+    public void setValue(final TreeID value) {
+      super.setValue(value.clone());
+    }
+  }
+
   /** nb attributes per generated data instance */
   static final int NUM_ATTRIBUTES = 4;
 
@@ -83,6 +103,7 @@ public final class Step1MapperTest exten
   /** nb mappers to use */
   static final int NUM_MAPPERS = 2;
 
+  @SuppressWarnings({ "rawtypes", "unchecked" })
   @Test
   public void testMapper() throws Exception {
     Long seed = null;
@@ -109,8 +130,13 @@ public final class Step1MapperTest exten
       // expected number of trees that this mapper will build
       int mapNbTrees = Step1Mapper.nbTrees(NUM_MAPPERS, NUM_TREES, partition);
 
-      MockContext context = new MockContext(new Step1Mapper(),
-          new Configuration(), new TaskAttemptID(), mapNbTrees);
+      Mapper.Context context =
+        createMock(Mapper.Context.class);
+      Capture<TreeID> capturedKeys = new TreeIDCapture();
+      context.write(capture(capturedKeys), anyObject());
+      expectLastCall().anyTimes();
+
+      replay(context);
 
       MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed,
           partition, NUM_MAPPERS, NUM_TREES);
@@ -125,12 +151,13 @@ public final class Step1MapperTest exten
       }
 
       mapper.cleanup(context);
+      verify(context);
 
       // make sure the mapper built all its trees
-      assertEquals(mapNbTrees, context.nbOutputs());
+      assertEquals(mapNbTrees, capturedKeys.getValues().size());
 
       // check the returned keys
-      for (TreeID k : context.getKeys()) {
+      for (TreeID k : capturedKeys.getValues()) {
         assertEquals(partition, k.partition());
         assertEquals(treeIndex, k.treeId());
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Mon Mar 12 18:25:45 2012
@@ -34,6 +34,7 @@ import org.apache.mahout.clustering.Clus
 import org.apache.mahout.common.DummyRecordWriter;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -126,8 +127,8 @@ public final class TestCanopyCreation ex
       int[] expectedNumPoints = { 4, 4, 3 };
       double[][] expectedCentroids = { { 1.5, 1.5 }, { 4.0, 4.0 },
           { 4.666666666666667, 4.6666666666666667 } };
-      assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
-          testCanopy.getNumObservations());
+      assertEquals("canopy points " + canopyIx, testCanopy.getNumObservations(),
+                   expectedNumPoints[canopyIx]);
       double[] refCentroid = expectedCentroids[canopyIx];
       Vector testCentroid = testCanopy.computeCentroid();
       for (int pointIx = 0; pointIx < refCentroid.length; pointIx++) {
@@ -151,8 +152,8 @@ public final class TestCanopyCreation ex
         { 4.666666666666667, 4.666666666666667 } };
     for (int canopyIx = 0; canopyIx < referenceEuclidean.size(); canopyIx++) {
       Canopy testCanopy = referenceEuclidean.get(canopyIx);
-      assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
-          testCanopy.getNumObservations());
+      assertEquals("canopy points " + canopyIx, testCanopy.getNumObservations(),
+                   expectedNumPoints[canopyIx]);
       double[] refCentroid = expectedCentroids[canopyIx];
       Vector testCentroid = testCanopy.computeCentroid();
       for (int pointIx = 0; pointIx < refCentroid.length; pointIx++) {
@@ -328,20 +329,36 @@ public final class TestCanopyCreation ex
       Canopy canopy = new Canopy();
       assertTrue("more to come", reader.next(key, canopy));
       assertEquals("1st key", "C-0", key.toString());
-      assertEquals("1st x value", 1.5, canopy.getCenter().get(0), EPSILON);
-      assertEquals("1st y value", 1.5, canopy.getCenter().get(1), EPSILON);
+
+      List<Pair<Double,Double>> refCenters = Lists.newArrayList();
+      refCenters.add(new Pair<Double,Double>(1.5,1.5));
+      refCenters.add(new Pair<Double,Double>(4.333333333333334,4.333333333333334));
+      Pair<Double,Double> c = new Pair<Double,Double>(canopy.getCenter().get(0),
+                                                      canopy.getCenter().get(1));
+      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
       assertTrue("more to come", reader.next(key, canopy));
       assertEquals("2nd key", "C-1", key.toString());
-      assertEquals("2nd x value", 4.333333333333334, canopy.getCenter().get(0),
-          EPSILON);
-      assertEquals("2nd y value", 4.333333333333334, canopy.getCenter().get(1),
-          EPSILON);
+      c = new Pair<Double,Double>(canopy.getCenter().get(0),
+                                  canopy.getCenter().get(1));
+      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
       assertFalse("more to come", reader.next(key, canopy));
     } finally {
       Closeables.closeQuietly(reader);
     }
   }
 
+  boolean findAndRemove(Pair<Double,Double> target, 
+                        List<Pair<Double,Double>> list, double epsilon) {
+    for (Pair<Double,Double> curr : list) {
+      if ( (Math.abs(target.getFirst() - curr.getFirst()) < epsilon) 
+           && (Math.abs(target.getSecond() - curr.getSecond()) < epsilon) ) {
+        list.remove(curr);
+        return true;
+      } 
+    }
+    return false;
+  }
+
   /**
    * Story: User can produce final canopy centers using a Hadoop map/reduce job
    * and a EuclideanDistanceMeasure.
@@ -368,14 +385,18 @@ public final class TestCanopyCreation ex
       Canopy value = new Canopy();
       assertTrue("more to come", reader.next(key, value));
       assertEquals("1st key", "C-0", key.toString());
-      assertEquals("1st x value", 1.8, value.getCenter().get(0), EPSILON);
-      assertEquals("1st y value", 1.8, value.getCenter().get(1), EPSILON);
+
+      List<Pair<Double,Double>> refCenters = Lists.newArrayList();
+      refCenters.add(new Pair<Double,Double>(1.8,1.8));
+      refCenters.add(new Pair<Double,Double>(4.433333333333334, 4.433333333333334));
+      Pair<Double,Double> c = new Pair<Double,Double>(value.getCenter().get(0),
+                                                      value.getCenter().get(1));
+      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
       assertTrue("more to come", reader.next(key, value));
       assertEquals("2nd key", "C-1", key.toString());
-      assertEquals("2nd x value", 4.433333333333334, value.getCenter().get(0),
-          EPSILON);
-      assertEquals("2nd y value", 4.433333333333334, value.getCenter().get(1),
-          EPSILON);
+      c = new Pair<Double,Double>(value.getCenter().get(0),
+                                  value.getCenter().get(1));
+      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
       assertFalse("more to come", reader.next(key, value));
     } finally {
       Closeables.closeQuietly(reader);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java Mon Mar 12 18:25:45 2012
@@ -20,6 +20,9 @@ package org.apache.mahout.clustering.cla
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;
+
+import com.google.common.collect.Sets;
 
 import junit.framework.Assert;
 
@@ -195,9 +198,7 @@ public class ClusterClassificationDriver
   }
   
   private void assertVectorsWithOutlierRemoval() {
-    assertFirstClusterWithOutlierRemoval();
-    assertSecondClusterWithOutlierRemoval();
-    assertThirdClusterWithOutlierRemoval();
+    checkClustersWithOutlierRemoval();
   }
   
   private void assertVectorsWithoutOutlierRemoval() {
@@ -230,25 +231,33 @@ public class ClusterClassificationDriver
           "{1:1.0,0:2.0}", "{1:2.0,0:1.0}"}, vector.asFormatString()));
     }
   }
-  
-  private void assertThirdClusterWithOutlierRemoval() {
-    Assert.assertEquals(1, thirdCluster.size());
-    for (Vector vector : thirdCluster) {
-      Assert.assertTrue(ArrayUtils.contains(new String[] {"{1:9.0,0:9.0}"},
-          vector.asFormatString()));
-    }
-  }
-  
-  private void assertSecondClusterWithOutlierRemoval() {
-    Assert.assertEquals(0, secondCluster.size());
-  }
-  
-  private void assertFirstClusterWithOutlierRemoval() {
-    Assert.assertEquals(1, firstCluster.size());
-    for (Vector vector : firstCluster) {
-      Assert.assertTrue(ArrayUtils.contains(new String[] {"{1:1.0,0:1.0}"},
-          vector.asFormatString()));
-    }
+
+  private void checkClustersWithOutlierRemoval() {
+    Set<String> reference = Sets.newHashSet(new String[] {"{1:9.0,0:9.0}",
+                                                          "{1:1.0,0:1.0}"});
+    int singletonCnt = 0;
+    int emptyCnt = 0;
+
+    List<List<Vector>> clusters = Lists.newArrayList();
+    clusters.add(firstCluster);
+    clusters.add(secondCluster);
+    clusters.add(thirdCluster);
+
+    for (List<Vector> vList : clusters) {
+      if (vList.size() == 0) {
+        emptyCnt++;
+      } else {
+        singletonCnt++;
+        Assert.assertTrue("expecting only singleton clusters; got size=" + vList.size(),
+                          vList.size() == 1);
+        Assert.assertTrue("not expecting cluster:" + vList.get(0).asFormatString(),
+                          reference.contains(vList.get(0).asFormatString()));
+        reference.remove(vList.get(0).asFormatString());
+      }
+    } 
+    Assert.assertEquals("Different number of empty clusters than expected!", 1, emptyCnt);
+    Assert.assertEquals("Different number of singletons than expected!", 2, singletonCnt);
+    Assert.assertEquals("Didn't match all reference clusters!", 0, reference.size());
   }
-  
+
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Mon Mar 12 18:25:45 2012
@@ -26,6 +26,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
@@ -38,6 +39,7 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.mahout.clustering.AbstractCluster;
 import org.apache.mahout.clustering.ClusterObservations;
 import org.apache.mahout.clustering.ClusteringTestUtils;
+import org.apache.mahout.clustering.canopy.Canopy;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
 import org.apache.mahout.clustering.classify.WeightedVectorWritable;
 import org.apache.mahout.common.DummyOutputCollector;
@@ -486,6 +488,42 @@ public final class TestKmeansClustering 
     // now run the Canopy job
     CanopyDriver.run(conf, pointsPath, outputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, false, 0.0, false);
     
+    DummyOutputCollector<Text, Canopy> collector1 =
+        new DummyOutputCollector<Text, Canopy>();
+
+    FileStatus[] outParts = FileSystem.get(conf).globStatus(
+                    new Path(outputPath, "clusters-0-final/*-0*"));
+    for (FileStatus outPartStat : outParts) {
+      for (Pair<Text,Canopy> record :
+               new SequenceFileIterable<Text,Canopy>(
+                 outPartStat.getPath(), conf)) {
+          collector1.collect(record.getFirst(), record.getSecond());
+      }
+    }
+
+    boolean got15 = false;
+    boolean got43 = false;
+    int count = 0;
+    for (Text k : collector1.getKeys()) {
+      count++;
+      List<Canopy> vl = collector1.getValue(k);
+      assertEquals("non-singleton centroid!", 1, vl.size());
+      Vector v = vl.get(0).getCenter();
+      assertEquals("cetriod vector is wrong length", 2, v.size());
+      if ( (Math.abs(v.get(0) - 1.5) < EPSILON) 
+                  && (Math.abs(v.get(1) - 1.5) < EPSILON)
+                  && !got15) {
+        got15 = true;
+      } else if ( (Math.abs(v.get(0) - 4.333333333333334) < EPSILON) 
+                  && (Math.abs(v.get(1) - 4.333333333333334) < EPSILON)
+                  && !got43) {
+        got43 = true;
+      } else {
+        assertTrue("got unexpected center: "+v+" ["+v.getClass().toString()+"]", false);
+      }
+    }
+    assertEquals("got unexpected number of centers", 2, count);
+
     // now run the KMeans job
     KMeansDriver.run(pointsPath, new Path(outputPath, "clusters-0-final"), outputPath, new EuclideanDistanceMeasure(),
         0.001, 10, true, false);
@@ -500,7 +538,28 @@ public final class TestKmeansClustering 
       collector.collect(record.getFirst(), record.getSecond());
     }
     
-    assertEquals("num points[0]", 4, collector.getValue(new IntWritable(0)).size());
-    assertEquals("num points[1]", 5, collector.getValue(new IntWritable(1)).size());
+    boolean gotLowClust = false;  // clusters should be [1, *] and [2, *] 
+    boolean gotHighClust = false; // vs [3 , *],  [4 , *] and [5, *] 
+    for (IntWritable k : collector.getKeys()) {
+      List<WeightedVectorWritable> wvList = collector.getValue(k);
+      assertTrue("empty cluster!", wvList.size() != 0);
+      if (wvList.get(0).getVector().get(0) <= 2.0) {
+        for (WeightedVectorWritable wv : wvList) {
+          Vector v = wv.getVector();
+          int idx = v.maxValueIndex();
+          assertTrue("bad cluster!", v.get(idx) <= 2.0);
+        }
+        assertEquals("Wrong size cluster", 4, wvList.size());
+        gotLowClust= true;
+      } else {
+        for (WeightedVectorWritable wv : wvList) {
+          Vector v = wv.getVector();
+          int idx = v.minValueIndex();
+          assertTrue("bad cluster!", v.get(idx) > 2.0);
+        }
+        assertEquals("Wrong size cluster", 5, wvList.size());
+        gotHighClust= true;
+      }
+    }
   }
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Mon Mar 12 18:25:45 2012
@@ -21,10 +21,12 @@ import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
@@ -350,7 +352,13 @@ public final class TestMeanShift extends
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(input.toUri(), conf);
     Collection<VectorWritable> points = Lists.newArrayList();
-    for (Vector v : raw) {
+    Random r = new Random(123);
+    Vector[] permutedRaw = new Vector[raw.length];
+    for (int i = 0; i < raw.length; i++)
+      permutedRaw = raw;
+    for (int i = 0; i < permutedRaw.length; i++)
+      permutedRaw[i] = permutedRaw[i + r.nextInt(raw.length - i)];
+    for (Vector v : permutedRaw) {
       points.add(new VectorWritable(v));
     }
     ClusteringTestUtils.writePointsToFile(points,
@@ -376,10 +384,12 @@ public final class TestMeanShift extends
         optKey(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION), "0.2",
         optKey(DefaultOptionCreator.OVERWRITE_OPTION) };
     ToolRunner.run(conf, new MeanShiftCanopyDriver(), args);
-    Path outPart = new Path(output, "clusters-4-final/part-r-00000");
-    long count = HadoopUtil.countRecords(outPart, conf);
-    assertEquals("count", 3, count);
-    outPart = new Path(output, "clusters-0/part-m-00000");
+    FileStatus[] outParts = FileSystem.get(conf).globStatus(
+        new Path(output, "clusters-?-final/part-r-*"));
+    assertEquals("Wrong number of matching final parts", 1, outParts.length);
+    long count = HadoopUtil.countRecords(outParts[0].getPath(), conf);
+    assertEquals("count", 5, count);
+    Path outPart = new Path(output, "clusters-0/part-m-00000");
     Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
         true, conf);
     // now test the initial clusters to ensure the type of their centers has

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java Mon Mar 12 18:25:45 2012
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.mahout.common;
-
-import org.apache.hadoop.mapreduce.Counter;
-
-final class DummyCounter extends Counter {
-
-}

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java Mon Mar 12 18:25:45 2012
@@ -17,16 +17,21 @@
 
 package org.apache.mahout.common;
 
+import com.google.common.collect.Lists;
+
 import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 
-import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.MapContext;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.ReduceContext;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
@@ -65,7 +70,18 @@ public final class DummyRecordWriter<K, 
                                                                       Configuration configuration,
                                                                       RecordWriter<K2, V2> output)
     throws IOException, InterruptedException {
-    return mapper.new Context(configuration, new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
+
+    // Use reflection since the context types changed incompatibly between 0.20
+    // and 0.23.
+    try {
+      return buildNewMapperContext(configuration, output);
+    } catch (Exception e) {
+      try {
+        return buildOldMapperContext(mapper, configuration, output);
+      } catch (Exception ex) {
+        throw new IllegalStateException(ex);
+      }
+    }
   }
 
   public static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context build(Reducer<K1, V1, K2, V2> reducer,
@@ -74,17 +90,96 @@ public final class DummyRecordWriter<K, 
                                                                        Class<K1> keyClass,
                                                                        Class<V1> valueClass)
     throws IOException, InterruptedException {
-    return reducer.new Context(configuration,
-                               new TaskAttemptID(),
-                               new MockIterator(),
-                               null,
-                               null,
-                               output,
-                               null,
-                               new DummyStatusReporter(),
-                               null,
-                               keyClass,
-                               valueClass);
+
+    // Use reflection since the context types changed incompatibly between 0.20
+    // and 0.23.
+    try {
+      return buildNewReducerContext(configuration, output, keyClass, valueClass);
+    } catch (Exception e) {
+      try {
+        return buildOldReducerContext(reducer, configuration, output, keyClass, valueClass);
+      } catch (Exception ex) {
+        throw new IllegalStateException(ex);
+      }
+    }
+  }
+
+  @SuppressWarnings({ "unchecked", "rawtypes" })
+  private static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context buildNewMapperContext(
+      Configuration configuration, RecordWriter<K2, V2> output) throws Exception {
+    Class<?> mapContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl");
+    Constructor<?> cons = mapContextImplClass.getConstructors()[0];
+    Object mapContextImpl = cons.newInstance(configuration,
+        new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
+
+    Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
+    Object wrappedMapper = wrappedMapperClass.newInstance();
+    Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
+    return (Mapper.Context) getMapContext.invoke(wrappedMapper, mapContextImpl);
+  }
+
+  @SuppressWarnings({ "unchecked", "rawtypes" })
+  private static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context buildOldMapperContext(
+      Mapper<K1, V1, K2, V2> mapper, Configuration configuration,
+      RecordWriter<K2, V2> output) throws Exception {
+    Constructor<?> cons = getNestedContextConstructor(mapper.getClass());
+    // first argument to the constructor is the enclosing instance
+    return (Mapper.Context) cons.newInstance(mapper, configuration,
+        new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
+  }
+
+  @SuppressWarnings({ "unchecked", "rawtypes" })
+  private static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context buildNewReducerContext(
+      Configuration configuration, RecordWriter<K2, V2> output, Class<K1> keyClass,
+      Class<V1> valueClass) throws Exception {
+    Class<?> reduceContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.ReduceContextImpl");
+    Constructor<?> cons = reduceContextImplClass.getConstructors()[0];
+    Object reduceContextImpl = cons.newInstance(configuration,
+      new TaskAttemptID(),
+      new MockIterator(),
+      null,
+      null,
+      output,
+      null,
+      new DummyStatusReporter(),
+      null,
+      keyClass,
+      valueClass);
+
+    Class<?> wrappedReducerClass = Class.forName("org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer");
+    Object wrappedReducer = wrappedReducerClass.newInstance();
+    Method getReducerContext = wrappedReducerClass.getMethod("getReducerContext", ReduceContext.class);
+    return (Reducer.Context) getReducerContext.invoke(wrappedReducer, reduceContextImpl);
+  }
+  
+  @SuppressWarnings({ "unchecked", "rawtypes" })
+  private static <K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context buildOldReducerContext(
+      Reducer<K1, V1, K2, V2> reducer, Configuration configuration,
+      RecordWriter<K2, V2> output, Class<K1> keyClass,
+      Class<V1> valueClass) throws Exception {
+    Constructor<?> cons = getNestedContextConstructor(reducer.getClass());
+    // first argument to the constructor is the enclosing instance
+    return (Reducer.Context) cons.newInstance(reducer,
+        configuration,
+        new TaskAttemptID(),
+        new MockIterator(),
+        null,
+        null,
+        output,
+        null,
+        new DummyStatusReporter(),
+        null,
+        keyClass,
+        valueClass);
+  }
+
+  private static Constructor<?> getNestedContextConstructor(Class<?> outerClass) {
+    for (Class<?> nestedClass : outerClass.getClasses()) {
+      if ("Context".equals(nestedClass.getSimpleName())) {
+        return nestedClass.getConstructors()[0];
+      }
+    }
+    throw new IllegalStateException("Cannot find context class for " + outerClass);
   }
 
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Mon Mar 12 18:25:45 2012
@@ -19,6 +19,8 @@
 
 package org.apache.mahout.common;
 
+import static org.easymock.EasyMock.createMockBuilder;
+
 import java.util.Map;
 
 import com.google.common.collect.Maps;
@@ -30,10 +32,21 @@ public final class DummyStatusReporter e
   private final Map<Enum<?>, Counter> counters = Maps.newHashMap();
   private final Map<String, Counter> counterGroups = Maps.newHashMap();
 
+  private Counter newCounter() {
+    try {
+      // 0.23 case
+      String c = "org.apache.hadoop.mapreduce.counters.GenericCounter";
+      return (Counter) createMockBuilder(Class.forName(c)).createMock();
+    } catch (ClassNotFoundException e) {
+      // 0.20 case
+      return createMockBuilder(Counter.class).createMock();
+    }
+  }
+
   @Override
   public Counter getCounter(Enum<?> name) {
     if (!counters.containsKey(name)) {
-      counters.put(name, new DummyCounter());
+      counters.put(name, newCounter());
     }
     return counters.get(name);
   }
@@ -42,7 +55,7 @@ public final class DummyStatusReporter e
   @Override
   public Counter getCounter(String group, String name) {
     if (!counterGroups.containsKey(group + name)) {
-      counterGroups.put(group + name, new DummyCounter());
+      counterGroups.put(group + name, newCounter());
     }
     return counterGroups.get(group+name);
   }
@@ -55,4 +68,8 @@ public final class DummyStatusReporter e
   public void setStatus(String status) {
   }
 
+  public float getProgress() {
+    return 0;
+  }
+
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java Mon Mar 12 18:25:45 2012
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.mahout.clustering.ClusteringTestUtils;
+import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.math.DenseVector;
@@ -254,14 +255,14 @@ public final class TestDistributedRowMat
 
     deleteContentsOfPath(conf, outputPath);
 
-    assertEquals(0, fs.listStatus(outputPath).length);
+    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
 
     Vector result1 = dm.times(v);
 
-    assertEquals(0, fs.listStatus(outputPath).length);
+    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
 
     deleteContentsOfPath(conf, outputPath);
-    assertEquals(0, fs.listStatus(outputPath).length);
+    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
 
     conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
     dm.setConf(conf);
@@ -291,14 +292,14 @@ public final class TestDistributedRowMat
 
     deleteContentsOfPath(conf, outputPath);
 
-    assertEquals(0, fs.listStatus(outputPath).length);
+    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
 
     Vector result1 = dm.timesSquared(v);
 
-    assertEquals(0, fs.listStatus(outputPath).length);
+    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
 
     deleteContentsOfPath(conf, outputPath);
-    assertEquals(0, fs.listStatus(outputPath).length);
+    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
 
     conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
     dm.setConf(conf);
@@ -325,7 +326,7 @@ public final class TestDistributedRowMat
   private static void deleteContentsOfPath(Configuration conf, Path path) throws Exception {
     FileSystem fs = path.getFileSystem(conf);
 
-    FileStatus[] statuses = fs.listStatus(path);
+    FileStatus[] statuses = HadoopUtil.listStatus(fs, path);
     for (FileStatus status : statuses) {
       fs.delete(status.getPath(), true);
     }

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Mon Mar 12 18:25:45 2012
@@ -193,7 +193,7 @@ public final class TestClusterDumper ext
         output, measure, 8, 4, true, 0.0, true);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output,
-        "clusters-0"), new Path(output, "clusteredPoints"));
+        "clusters-0-final"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(termDictionary);
   }
   

Modified: mahout/trunk/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/pom.xml?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/pom.xml (original)
+++ mahout/trunk/pom.xml Mon Mar 12 18:25:45 2012
@@ -107,6 +107,17 @@
     <url>https://issues.apache.org/jira/browse/MAHOUT</url>
   </issueManagement>
   
+  <repositories>
+    <repository>
+      <id>apache.snapshots</id>
+      <name>Apache Snapshot Repository</name>
+      <url>http://repository.apache.org/snapshots</url>
+      <releases>
+        <enabled>false</enabled>
+      </releases>
+    </repository>
+  </repositories>
+  
   <dependencyManagement>
     <dependencies>
     
@@ -264,6 +275,100 @@
         </exclusions>
       </dependency>
       <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-common</artifactId>
+        <version>${hadoop.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>net.sf.kosmosfs</groupId>
+            <artifactId>kfs</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty-util</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>hsqldb</groupId>
+            <artifactId>hsqldb</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-el</groupId>
+            <artifactId>commons-el</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>oro</groupId>
+            <artifactId>oro</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jsp-2.1</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jsp-api-2.1</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>servlet-api-2.5</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-net</groupId>
+            <artifactId>commons-net</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>tomcat</groupId>
+            <artifactId>jasper-runtime</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>tomcat</groupId>
+            <artifactId>jasper-compiler</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>xmlenc</groupId>
+            <artifactId>xmlenc</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>net.java.dev.jets3t</groupId>
+            <artifactId>jets3t</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.eclipse.jdt</groupId>
+            <artifactId>core</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-jcl</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-log4j12</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-mapreduce-client-core</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-mapreduce-client-common</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+
+      <dependency>
         <groupId>org.codehaus.jackson</groupId>
         <artifactId>jackson-core-asl</artifactId>
         <version>1.8.2</version>

Re: svn commit: r1299770 - in /mahout/trunk: ./ core/ core/src/main/java/org/apache/mahout/common/ core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/ core/src/test/java/org/apache/mahout/clustering/canopy/ core/src/test/java/org/apache/m...

Posted by Grant Ingersoll <gs...@apache.org>.

Done (tcp@a.o)

On Mar 12, 2012, at 2:30 PM, tom pierce wrote:

> Can someone hook me up with JIRA privs so I can close tickets?
> 
> (Or, if that isn't something all committers get, someone pls mark -822 and -980 closed)
> 
> -tom
> 
> On 03/12/2012 02:25 PM, tcp@apache.org wrote:
>> Author: tcp
>> Date: Mon Mar 12 18:25:45 2012
>> New Revision: 1299770
>> 
>> URL: http://svn.apache.org/viewvc?rev=1299770&view=rev
>> Log:
>> MAHOUT-822: Make Mahout compatible with Hadoop 0.23.1.
>> 
>> Modified:
>>     mahout/trunk/core/pom.xml
>>     mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
>>     mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
>>     mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
>>     mahout/trunk/pom.xml
>> 
>> Modified: mahout/trunk/core/pom.xml
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/pom.xml (original)
>> +++ mahout/trunk/core/pom.xml Mon Mar 12 18:25:45 2012
>> @@ -140,10 +140,6 @@
>> 
>>      <!-- Third Party -->
>>      <dependency>
>> -<groupId>org.apache.hadoop</groupId>
>> -<artifactId>hadoop-core</artifactId>
>> -</dependency>
>> -<dependency>
>>        <groupId>org.codehaus.jackson</groupId>
>>        <artifactId>jackson-core-asl</artifactId>
>>      </dependency>
>> @@ -211,4 +207,43 @@
>>      </dependency>
>> 
>>    </dependencies>
>> +
>> +<profiles>
>> +<profile>
>> +<id>hadoop-0.20</id>
>> +<activation>
>> +<property>
>> +<name>!hadoop.version</name>
>> +</property>
>> +</activation>
>> +<dependencies>
>> +<dependency>
>> +<groupId>org.apache.hadoop</groupId>
>> +<artifactId>hadoop-core</artifactId>
>> +</dependency>
>> +</dependencies>
>> +</profile>
>> +<profile>
>> +<id>hadoop-0.23</id>
>> +<activation>
>> +<property>
>> +<name>hadoop.version</name>
>> +</property>
>> +</activation>
>> +<dependencies>
>> +<dependency>
>> +<groupId>org.apache.hadoop</groupId>
>> +<artifactId>hadoop-common</artifactId>
>> +</dependency>
>> +<dependency>
>> +<groupId>org.apache.hadoop</groupId>
>> +<artifactId>hadoop-mapreduce-client-common</artifactId>
>> +</dependency>
>> +<dependency>
>> +<groupId>org.apache.hadoop</groupId>
>> +<artifactId>hadoop-mapreduce-client-core</artifactId>
>> +</dependency>
>> +</dependencies>
>> +</profile>
>> +</profiles>
>>  </project>
>> 
>> Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
>> +++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Mon Mar 12 18:25:45 2012
>> @@ -17,6 +17,7 @@
>> 
>>  package org.apache.mahout.common;
>> 
>> +import java.io.FileNotFoundException;
>>  import java.io.IOException;
>>  import java.io.InputStream;
>>  import java.net.URI;
>> @@ -229,9 +230,9 @@ public final class HadoopUtil {
>>      FileStatus[] statuses;
>>      FileSystem fs = path.getFileSystem(conf);
>>      if (filter == null) {
>> -      statuses = pathType == PathType.GLOB ? fs.globStatus(path) : fs.listStatus(path);
>> +      statuses = pathType == PathType.GLOB ? fs.globStatus(path) : listStatus(fs, path);
>>      } else {
>> -      statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : fs.listStatus(path, filter);
>> +      statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : listStatus(fs, path, filter);
>>      }
>>      if (ordering != null) {
>>        Arrays.sort(statuses, ordering);
>> @@ -239,6 +240,22 @@ public final class HadoopUtil {
>>      return statuses;
>>    }
>> 
>> +  public static FileStatus[] listStatus(FileSystem fs, Path path) throws IOException {
>> +    try {
>> +      return fs.listStatus(path);
>> +    } catch (FileNotFoundException e) {
>> +      return new FileStatus[0];
>> +    }
>> +  }
>> +
>> +  public static FileStatus[] listStatus(FileSystem fs, Path path, PathFilter filter) throws IOException {
>> +    try {
>> +      return fs.listStatus(path, filter);
>> +    } catch (FileNotFoundException e) {
>> +      return new FileStatus[0];
>> +    }
>> +  }
>> +
>>    public static void cacheFiles(Path fileToCache, Configuration conf) {
>>      DistributedCache.setCacheFiles(new URI[]{fileToCache.toUri()}, conf);
>>    }
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java Mon Mar 12 18:25:45 2012
>> @@ -1,70 +0,0 @@
>> -/**
>> - * Licensed to the Apache Software Foundation (ASF) under one or more
>> - * contributor license agreements.  See the NOTICE file distributed with
>> - * this work for additional information regarding copyright ownership.
>> - * The ASF licenses this file to You under the Apache License, Version 2.0
>> - * (the "License"); you may not use this file except in compliance with
>> - * the License.  You may obtain a copy of the License at
>> - *
>> - *     http://www.apache.org/licenses/LICENSE-2.0
>> - *
>> - * Unless required by applicable law or agreed to in writing, software
>> - * distributed under the License is distributed on an "AS IS" BASIS,
>> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> - * See the License for the specific language governing permissions and
>> - * limitations under the License.
>> - */
>> -package org.apache.mahout.classifier.df.mapreduce.partial;
>> -
>> -import java.io.IOException;
>> -
>> -import org.apache.hadoop.conf.Configuration;
>> -import org.apache.hadoop.mapreduce.Mapper;
>> -import org.apache.hadoop.mapreduce.TaskAttemptID;
>> -import org.apache.hadoop.mapreduce.Mapper.Context;
>> -import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
>> -
>> -/**
>> - * Special implementation that collects the output of the mappers
>> - */
>> -final class MockContext extends Context {
>> -
>> -  private final TreeID[] keys;
>> -  private final MapredOutput[] values;
>> -  private int index;
>> -
>> -  MockContext(Mapper<?,?,?,?>  mapper, Configuration conf, TaskAttemptID taskid, int nbTrees)
>> -    throws IOException, InterruptedException {
>> -    mapper.super(conf, taskid, null, null, null, null, null);
>> -
>> -    keys = new TreeID[nbTrees];
>> -    values = new MapredOutput[nbTrees];
>> -  }
>> -
>> -  @Override
>> -  public void write(Object key, Object value) throws IOException {
>> -    if (index == keys.length) {
>> -      throw new IOException("Received more output than expected : " + index);
>> -    }
>> -
>> -    keys[index] = ((TreeID) key).clone();
>> -    values[index] = ((MapredOutput) value).clone();
>> -
>> -    index++;
>> -  }
>> -
>> -  /**
>> -   * @return number of outputs collected
>> -   */
>> -  public int nbOutputs() {
>> -    return index;
>> -  }
>> -
>> -  public TreeID[] getKeys() {
>> -    return keys;
>> -  }
>> -
>> -  public MapredOutput[] getValues() {
>> -    return values;
>> -  }
>> -}
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java Mon Mar 12 18:25:45 2012
>> @@ -1,176 +0,0 @@
>> -/**
>> - * Licensed to the Apache Software Foundation (ASF) under one or more
>> - * contributor license agreements.  See the NOTICE file distributed with
>> - * this work for additional information regarding copyright ownership.
>> - * The ASF licenses this file to You under the Apache License, Version 2.0
>> - * (the "License"); you may not use this file except in compliance with
>> - * the License.  You may obtain a copy of the License at
>> - *
>> - *     http://www.apache.org/licenses/LICENSE-2.0
>> - *
>> - * Unless required by applicable law or agreed to in writing, software
>> - * distributed under the License is distributed on an "AS IS" BASIS,
>> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> - * See the License for the specific language governing permissions and
>> - * limitations under the License.
>> - */
>> -
>> -package org.apache.mahout.classifier.df.mapreduce.partial;
>> -
>> -import java.io.IOException;
>> -import java.util.List;
>> -
>> -import org.apache.commons.lang.ArrayUtils;
>> -import org.apache.hadoop.conf.Configuration;
>> -import org.apache.hadoop.fs.Path;
>> -import org.apache.hadoop.io.LongWritable;
>> -import org.apache.hadoop.io.Text;
>> -import org.apache.hadoop.mapreduce.InputSplit;
>> -import org.apache.hadoop.mapreduce.Job;
>> -import org.apache.hadoop.mapreduce.RecordReader;
>> -import org.apache.hadoop.mapreduce.TaskAttemptContext;
>> -import org.apache.hadoop.mapreduce.TaskAttemptID;
>> -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
>> -import org.apache.mahout.classifier.df.DFUtils;
>> -import org.apache.mahout.classifier.df.DecisionForest;
>> -import org.apache.mahout.classifier.df.builder.TreeBuilder;
>> -import org.apache.mahout.classifier.df.data.Dataset;
>> -import org.apache.mahout.classifier.df.mapreduce.Builder;
>> -import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
>> -import org.apache.mahout.classifier.df.node.Node;
>> -import org.slf4j.Logger;
>> -import org.slf4j.LoggerFactory;
>> -
>> -import com.google.common.collect.Lists;
>> -
>> -/**
>> - * Simulates the Partial mapreduce implementation in a sequential manner. Must
>> - * receive a seed
>> - */
>> -public class PartialSequentialBuilder extends PartialBuilder {
>> -
>> -  private static final Logger log = LoggerFactory.getLogger(PartialSequentialBuilder.class);
>> -
>> -  private MockContext firstOutput;
>> -
>> -  private final Dataset dataset;
>> -
>> -  public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
>> -      Dataset dataset, long seed, Configuration conf) {
>> -    super(treeBuilder, dataPath, new Path("notUsed"), seed, conf);
>> -    this.dataset = dataset;
>> -  }
>> -
>> -  public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
>> -      Dataset dataset, long seed) {
>> -    this(treeBuilder, dataPath, dataset, seed, new Configuration());
>> -  }
>> -
>> -  @Override
>> -  protected void configureJob(Job job)
>> -      throws IOException {
>> -    Configuration conf = job.getConfiguration();
>> -
>> -    int num = conf.getInt("mapred.map.tasks", -1);
>> -
>> -    super.configureJob(job);
>> -
>> -    // PartialBuilder sets the number of maps to 1 if we are running in 'local'
>> -    conf.setInt("mapred.map.tasks", num);
>> -  }
>> -
>> -  @Override
>> -  protected boolean runJob(Job job) throws IOException, InterruptedException {
>> -    Configuration conf = job.getConfiguration();
>> -
>> -    // retrieve the splits
>> -    TextInputFormat input = new TextInputFormat();
>> -    List<InputSplit>  splits = input.getSplits(job);
>> -
>> -    int nbSplits = splits.size();
>> -    log.debug("Nb splits : {}", nbSplits);
>> -
>> -    InputSplit[] sorted = new InputSplit[nbSplits];
>> -    splits.toArray(sorted);
>> -    Builder.sortSplits(sorted);
>> -
>> -    int numTrees = Builder.getNbTrees(conf); // total number of trees
>> -
>> -    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());
>> -
>> -    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);
>> -
>> -    /* first instance id in hadoop's order */
>> -    //int[] firstIds = new int[nbSplits];
>> -    /* partitions' sizes in hadoop order */
>> -    int[] sizes = new int[nbSplits];
>> -
>> -    // to compute firstIds, process the splits in file order
>> -    long slowest = 0; // duration of slowest map
>> -    int firstId = 0;
>> -    for (InputSplit split : splits) {
>> -      int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
>> -
>> -      RecordReader<LongWritable, Text>  reader = input.createRecordReader(split, task);
>> -      reader.initialize(split, task);
>> -
>> -      Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(),
>> -                                               hp, nbSplits, numTrees);
>> -
>> -      long time = System.currentTimeMillis();
>> -
>> -      //firstIds[hp] = firstId;
>> -
>> -      while (reader.nextKeyValue()) {
>> -        mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
>> -        firstId++;
>> -        sizes[hp]++;
>> -      }
>> -
>> -      mapper.cleanup(firstOutput);
>> -
>> -      time = System.currentTimeMillis() - time;
>> -      log.info("Duration : {}", DFUtils.elapsedTime(time));
>> -
>> -      if (time>  slowest) {
>> -        slowest = time;
>> -      }
>> -    }
>> -
>> -    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
>> -    return true;
>> -  }
>> -
>> -  @Override
>> -  protected DecisionForest parseOutput(Job job) throws IOException {
>> -    return processOutput(firstOutput.getKeys(), firstOutput.getValues());
>> -  }
>> -
>> -  /**
>> -   * extract the decision forest
>> -   */
>> -  protected static DecisionForest processOutput(TreeID[] keys, MapredOutput[] values) {
>> -    List<Node>  trees = Lists.newArrayList();
>> -
>> -    for (int index = 0; index<  keys.length; index++) {
>> -      MapredOutput value = values[index];
>> -      trees.add(value.getTree());
>> -    }
>> -
>> -    return new DecisionForest(trees);
>> -  }
>> -
>> -  /**
>> -   * Special Step1Mapper that can be configured without using a Configuration
>> -   *
>> -   */
>> -  private static class MockStep1Mapper extends Step1Mapper {
>> -    protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
>> -        int partition, int numMapTasks, int numTrees) {
>> -      configure(false, treeBuilder, dataset);
>> -      configure(seed, partition, numMapTasks, numTrees);
>> -    }
>> -
>> -  }
>> -
>> -}
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java Mon Mar 12 18:25:45 2012
>> @@ -17,21 +17,30 @@
>> 
>>  package org.apache.mahout.classifier.df.mapreduce.partial;
>> 
>> +import static org.easymock.EasyMock.anyObject;
>> +import static org.easymock.EasyMock.capture;
>> +import static org.easymock.EasyMock.createMock;
>> +import static org.easymock.EasyMock.expectLastCall;
>> +import static org.easymock.EasyMock.replay;
>> +import static org.easymock.EasyMock.verify;
>> +
>>  import java.util.Random;
>> 
>> -import org.apache.hadoop.conf.Configuration;
>>  import org.apache.hadoop.io.LongWritable;
>>  import org.apache.hadoop.io.Text;
>> -import org.apache.hadoop.mapreduce.TaskAttemptID;
>> -import org.apache.mahout.common.MahoutTestCase;
>> +import org.apache.hadoop.mapreduce.Mapper;
>>  import org.apache.mahout.common.RandomUtils;
>>  import org.apache.mahout.classifier.df.builder.TreeBuilder;
>>  import org.apache.mahout.classifier.df.data.Data;
>>  import org.apache.mahout.classifier.df.data.DataLoader;
>>  import org.apache.mahout.classifier.df.data.Dataset;
>>  import org.apache.mahout.classifier.df.data.Utils;
>> +import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
>>  import org.apache.mahout.classifier.df.node.Leaf;
>>  import org.apache.mahout.classifier.df.node.Node;
>> +import org.apache.mahout.common.MahoutTestCase;
>> +import org.easymock.Capture;
>> +import org.easymock.CaptureType;
>>  import org.junit.Test;
>> 
>>  public final class Step1MapperTest extends MahoutTestCase {
>> @@ -71,6 +80,17 @@ public final class Step1MapperTest exten
>>      }
>>    }
>> 
>> +  private static class TreeIDCapture extends Capture<TreeID>  {
>> +
>> +    public TreeIDCapture() {
>> +      super(CaptureType.ALL);
>> +    }
>> +
>> +    public void setValue(final TreeID value) {
>> +      super.setValue(value.clone());
>> +    }
>> +  }
>> +
>>    /** nb attributes per generated data instance */
>>    static final int NUM_ATTRIBUTES = 4;
>> 
>> @@ -83,6 +103,7 @@ public final class Step1MapperTest exten
>>    /** nb mappers to use */
>>    static final int NUM_MAPPERS = 2;
>> 
>> +  @SuppressWarnings({ "rawtypes", "unchecked" })
>>    @Test
>>    public void testMapper() throws Exception {
>>      Long seed = null;
>> @@ -109,8 +130,13 @@ public final class Step1MapperTest exten
>>        // expected number of trees that this mapper will build
>>        int mapNbTrees = Step1Mapper.nbTrees(NUM_MAPPERS, NUM_TREES, partition);
>> 
>> -      MockContext context = new MockContext(new Step1Mapper(),
>> -          new Configuration(), new TaskAttemptID(), mapNbTrees);
>> +      Mapper.Context context =
>> +        createMock(Mapper.Context.class);
>> +      Capture<TreeID>  capturedKeys = new TreeIDCapture();
>> +      context.write(capture(capturedKeys), anyObject());
>> +      expectLastCall().anyTimes();
>> +
>> +      replay(context);
>> 
>>        MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed,
>>            partition, NUM_MAPPERS, NUM_TREES);
>> @@ -125,12 +151,13 @@ public final class Step1MapperTest exten
>>        }
>> 
>>        mapper.cleanup(context);
>> +      verify(context);
>> 
>>        // make sure the mapper built all its trees
>> -      assertEquals(mapNbTrees, context.nbOutputs());
>> +      assertEquals(mapNbTrees, capturedKeys.getValues().size());
>> 
>>        // check the returned keys
>> -      for (TreeID k : context.getKeys()) {
>> +      for (TreeID k : capturedKeys.getValues()) {
>>          assertEquals(partition, k.partition());
>>          assertEquals(treeIndex, k.treeId());
>> 
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Mon Mar 12 18:25:45 2012
>> @@ -34,6 +34,7 @@ import org.apache.mahout.clustering.Clus
>>  import org.apache.mahout.common.DummyRecordWriter;
>>  import org.apache.mahout.common.HadoopUtil;
>>  import org.apache.mahout.common.MahoutTestCase;
>> +import org.apache.mahout.common.Pair;
>>  import org.apache.mahout.common.commandline.DefaultOptionCreator;
>>  import org.apache.mahout.common.distance.DistanceMeasure;
>>  import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
>> @@ -126,8 +127,8 @@ public final class TestCanopyCreation ex
>>        int[] expectedNumPoints = { 4, 4, 3 };
>>        double[][] expectedCentroids = { { 1.5, 1.5 }, { 4.0, 4.0 },
>>            { 4.666666666666667, 4.6666666666666667 } };
>> -      assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
>> -          testCanopy.getNumObservations());
>> +      assertEquals("canopy points " + canopyIx, testCanopy.getNumObservations(),
>> +                   expectedNumPoints[canopyIx]);
>>        double[] refCentroid = expectedCentroids[canopyIx];
>>        Vector testCentroid = testCanopy.computeCentroid();
>>        for (int pointIx = 0; pointIx<  refCentroid.length; pointIx++) {
>> @@ -151,8 +152,8 @@ public final class TestCanopyCreation ex
>>          { 4.666666666666667, 4.666666666666667 } };
>>      for (int canopyIx = 0; canopyIx<  referenceEuclidean.size(); canopyIx++) {
>>        Canopy testCanopy = referenceEuclidean.get(canopyIx);
>> -      assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
>> -          testCanopy.getNumObservations());
>> +      assertEquals("canopy points " + canopyIx, testCanopy.getNumObservations(),
>> +                   expectedNumPoints[canopyIx]);
>>        double[] refCentroid = expectedCentroids[canopyIx];
>>        Vector testCentroid = testCanopy.computeCentroid();
>>        for (int pointIx = 0; pointIx<  refCentroid.length; pointIx++) {
>> @@ -328,20 +329,36 @@ public final class TestCanopyCreation ex
>>        Canopy canopy = new Canopy();
>>        assertTrue("more to come", reader.next(key, canopy));
>>        assertEquals("1st key", "C-0", key.toString());
>> -      assertEquals("1st x value", 1.5, canopy.getCenter().get(0), EPSILON);
>> -      assertEquals("1st y value", 1.5, canopy.getCenter().get(1), EPSILON);
>> +
>> +      List<Pair<Double,Double>>  refCenters = Lists.newArrayList();
>> +      refCenters.add(new Pair<Double,Double>(1.5,1.5));
>> +      refCenters.add(new Pair<Double,Double>(4.333333333333334,4.333333333333334));
>> +      Pair<Double,Double>  c = new Pair<Double,Double>(canopy.getCenter().get(0),
>> +                                                      canopy.getCenter().get(1));
>> +      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
>>        assertTrue("more to come", reader.next(key, canopy));
>>        assertEquals("2nd key", "C-1", key.toString());
>> -      assertEquals("2nd x value", 4.333333333333334, canopy.getCenter().get(0),
>> -          EPSILON);
>> -      assertEquals("2nd y value", 4.333333333333334, canopy.getCenter().get(1),
>> -          EPSILON);
>> +      c = new Pair<Double,Double>(canopy.getCenter().get(0),
>> +                                  canopy.getCenter().get(1));
>> +      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
>>        assertFalse("more to come", reader.next(key, canopy));
>>      } finally {
>>        Closeables.closeQuietly(reader);
>>      }
>>    }
>> 
>> +  boolean findAndRemove(Pair<Double,Double>  target,
>> +                        List<Pair<Double,Double>>  list, double epsilon) {
>> +    for (Pair<Double,Double>  curr : list) {
>> +      if ( (Math.abs(target.getFirst() - curr.getFirst())<  epsilon)
>> +&&  (Math.abs(target.getSecond() - curr.getSecond())<  epsilon) ) {
>> +        list.remove(curr);
>> +        return true;
>> +      }
>> +    }
>> +    return false;
>> +  }
>> +
>>    /**
>>     * Story: User can produce final canopy centers using a Hadoop map/reduce job
>>     * and a EuclideanDistanceMeasure.
>> @@ -368,14 +385,18 @@ public final class TestCanopyCreation ex
>>        Canopy value = new Canopy();
>>        assertTrue("more to come", reader.next(key, value));
>>        assertEquals("1st key", "C-0", key.toString());
>> -      assertEquals("1st x value", 1.8, value.getCenter().get(0), EPSILON);
>> -      assertEquals("1st y value", 1.8, value.getCenter().get(1), EPSILON);
>> +
>> +      List<Pair<Double,Double>>  refCenters = Lists.newArrayList();
>> +      refCenters.add(new Pair<Double,Double>(1.8,1.8));
>> +      refCenters.add(new Pair<Double,Double>(4.433333333333334, 4.433333333333334));
>> +      Pair<Double,Double>  c = new Pair<Double,Double>(value.getCenter().get(0),
>> +                                                      value.getCenter().get(1));
>> +      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
>>        assertTrue("more to come", reader.next(key, value));
>>        assertEquals("2nd key", "C-1", key.toString());
>> -      assertEquals("2nd x value", 4.433333333333334, value.getCenter().get(0),
>> -          EPSILON);
>> -      assertEquals("2nd y value", 4.433333333333334, value.getCenter().get(1),
>> -          EPSILON);
>> +      c = new Pair<Double,Double>(value.getCenter().get(0),
>> +                                  value.getCenter().get(1));
>> +      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
>>        assertFalse("more to come", reader.next(key, value));
>>      } finally {
>>        Closeables.closeQuietly(reader);
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java Mon Mar 12 18:25:45 2012
>> @@ -20,6 +20,9 @@ package org.apache.mahout.clustering.cla
>>  import java.io.IOException;
>>  import java.util.ArrayList;
>>  import java.util.List;
>> +import java.util.Set;
>> +
>> +import com.google.common.collect.Sets;
>> 
>>  import junit.framework.Assert;
>> 
>> @@ -195,9 +198,7 @@ public class ClusterClassificationDriver
>>    }
>> 
>>    private void assertVectorsWithOutlierRemoval() {
>> -    assertFirstClusterWithOutlierRemoval();
>> -    assertSecondClusterWithOutlierRemoval();
>> -    assertThirdClusterWithOutlierRemoval();
>> +    checkClustersWithOutlierRemoval();
>>    }
>> 
>>    private void assertVectorsWithoutOutlierRemoval() {
>> @@ -230,25 +231,33 @@ public class ClusterClassificationDriver
>>            "{1:1.0,0:2.0}", "{1:2.0,0:1.0}"}, vector.asFormatString()));
>>      }
>>    }
>> -
>> -  private void assertThirdClusterWithOutlierRemoval() {
>> -    Assert.assertEquals(1, thirdCluster.size());
>> -    for (Vector vector : thirdCluster) {
>> -      Assert.assertTrue(ArrayUtils.contains(new String[] {"{1:9.0,0:9.0}"},
>> -          vector.asFormatString()));
>> -    }
>> -  }
>> -
>> -  private void assertSecondClusterWithOutlierRemoval() {
>> -    Assert.assertEquals(0, secondCluster.size());
>> -  }
>> -
>> -  private void assertFirstClusterWithOutlierRemoval() {
>> -    Assert.assertEquals(1, firstCluster.size());
>> -    for (Vector vector : firstCluster) {
>> -      Assert.assertTrue(ArrayUtils.contains(new String[] {"{1:1.0,0:1.0}"},
>> -          vector.asFormatString()));
>> -    }
>> +
>> +  private void checkClustersWithOutlierRemoval() {
>> +    Set<String>  reference = Sets.newHashSet(new String[] {"{1:9.0,0:9.0}",
>> +                                                          "{1:1.0,0:1.0}"});
>> +    int singletonCnt = 0;
>> +    int emptyCnt = 0;
>> +
>> +    List<List<Vector>>  clusters = Lists.newArrayList();
>> +    clusters.add(firstCluster);
>> +    clusters.add(secondCluster);
>> +    clusters.add(thirdCluster);
>> +
>> +    for (List<Vector>  vList : clusters) {
>> +      if (vList.size() == 0) {
>> +        emptyCnt++;
>> +      } else {
>> +        singletonCnt++;
>> +        Assert.assertTrue("expecting only singleton clusters; got size=" + vList.size(),
>> +                          vList.size() == 1);
>> +        Assert.assertTrue("not expecting cluster:" + vList.get(0).asFormatString(),
>> +                          reference.contains(vList.get(0).asFormatString()));
>> +        reference.remove(vList.get(0).asFormatString());
>> +      }
>> +    }
>> +    Assert.assertEquals("Different number of empty clusters than expected!", 1, emptyCnt);
>> +    Assert.assertEquals("Different number of singletons than expected!", 2, singletonCnt);
>> +    Assert.assertEquals("Didn't match all reference clusters!", 0, reference.size());
>>    }
>> -
>> +
>>  }
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Mon Mar 12 18:25:45 2012
>> @@ -26,6 +26,7 @@ import com.google.common.collect.Lists;
>>  import com.google.common.collect.Maps;
>>  import com.google.common.io.Closeables;
>>  import org.apache.hadoop.conf.Configuration;
>> +import org.apache.hadoop.fs.FileStatus;
>>  import org.apache.hadoop.fs.FileSystem;
>>  import org.apache.hadoop.fs.Path;
>>  import org.apache.hadoop.io.IntWritable;
>> @@ -38,6 +39,7 @@ import org.apache.hadoop.util.ToolRunner
>>  import org.apache.mahout.clustering.AbstractCluster;
>>  import org.apache.mahout.clustering.ClusterObservations;
>>  import org.apache.mahout.clustering.ClusteringTestUtils;
>> +import org.apache.mahout.clustering.canopy.Canopy;
>>  import org.apache.mahout.clustering.canopy.CanopyDriver;
>>  import org.apache.mahout.clustering.classify.WeightedVectorWritable;
>>  import org.apache.mahout.common.DummyOutputCollector;
>> @@ -486,6 +488,42 @@ public final class TestKmeansClustering
>>      // now run the Canopy job
>>      CanopyDriver.run(conf, pointsPath, outputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, false, 0.0, false);
>> 
>> +    DummyOutputCollector<Text, Canopy>  collector1 =
>> +        new DummyOutputCollector<Text, Canopy>();
>> +
>> +    FileStatus[] outParts = FileSystem.get(conf).globStatus(
>> +                    new Path(outputPath, "clusters-0-final/*-0*"));
>> +    for (FileStatus outPartStat : outParts) {
>> +      for (Pair<Text,Canopy>  record :
>> +               new SequenceFileIterable<Text,Canopy>(
>> +                 outPartStat.getPath(), conf)) {
>> +          collector1.collect(record.getFirst(), record.getSecond());
>> +      }
>> +    }
>> +
>> +    boolean got15 = false;
>> +    boolean got43 = false;
>> +    int count = 0;
>> +    for (Text k : collector1.getKeys()) {
>> +      count++;
>> +      List<Canopy>  vl = collector1.getValue(k);
>> +      assertEquals("non-singleton centroid!", 1, vl.size());
>> +      Vector v = vl.get(0).getCenter();
>> +      assertEquals("cetriod vector is wrong length", 2, v.size());
>> +      if ( (Math.abs(v.get(0) - 1.5)<  EPSILON)
>> +&&  (Math.abs(v.get(1) - 1.5)<  EPSILON)
>> +&&  !got15) {
>> +        got15 = true;
>> +      } else if ( (Math.abs(v.get(0) - 4.333333333333334)<  EPSILON)
>> +&&  (Math.abs(v.get(1) - 4.333333333333334)<  EPSILON)
>> +&&  !got43) {
>> +        got43 = true;
>> +      } else {
>> +        assertTrue("got unexpected center: "+v+" ["+v.getClass().toString()+"]", false);
>> +      }
>> +    }
>> +    assertEquals("got unexpected number of centers", 2, count);
>> +
>>      // now run the KMeans job
>>      KMeansDriver.run(pointsPath, new Path(outputPath, "clusters-0-final"), outputPath, new EuclideanDistanceMeasure(),
>>          0.001, 10, true, false);
>> @@ -500,7 +538,28 @@ public final class TestKmeansClustering
>>        collector.collect(record.getFirst(), record.getSecond());
>>      }
>> 
>> -    assertEquals("num points[0]", 4, collector.getValue(new IntWritable(0)).size());
>> -    assertEquals("num points[1]", 5, collector.getValue(new IntWritable(1)).size());
>> +    boolean gotLowClust = false;  // clusters should be [1, *] and [2, *]
>> +    boolean gotHighClust = false; // vs [3 , *],  [4 , *] and [5, *]
>> +    for (IntWritable k : collector.getKeys()) {
>> +      List<WeightedVectorWritable>  wvList = collector.getValue(k);
>> +      assertTrue("empty cluster!", wvList.size() != 0);
>> +      if (wvList.get(0).getVector().get(0)<= 2.0) {
>> +        for (WeightedVectorWritable wv : wvList) {
>> +          Vector v = wv.getVector();
>> +          int idx = v.maxValueIndex();
>> +          assertTrue("bad cluster!", v.get(idx)<= 2.0);
>> +        }
>> +        assertEquals("Wrong size cluster", 4, wvList.size());
>> +        gotLowClust= true;
>> +      } else {
>> +        for (WeightedVectorWritable wv : wvList) {
>> +          Vector v = wv.getVector();
>> +          int idx = v.minValueIndex();
>> +          assertTrue("bad cluster!", v.get(idx)>  2.0);
>> +        }
>> +        assertEquals("Wrong size cluster", 5, wvList.size());
>> +        gotHighClust= true;
>> +      }
>> +    }
>>    }
>>  }
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Mon Mar 12 18:25:45 2012
>> @@ -21,10 +21,12 @@ import java.util.Collection;
>>  import java.util.Iterator;
>>  import java.util.List;
>>  import java.util.Map;
>> +import java.util.Random;
>> 
>>  import com.google.common.collect.Lists;
>>  import com.google.common.collect.Maps;
>>  import org.apache.hadoop.conf.Configuration;
>> +import org.apache.hadoop.fs.FileStatus;
>>  import org.apache.hadoop.fs.FileSystem;
>>  import org.apache.hadoop.fs.Path;
>>  import org.apache.hadoop.io.Text;
>> @@ -350,7 +352,13 @@ public final class TestMeanShift extends
>>      Configuration conf = new Configuration();
>>      FileSystem fs = FileSystem.get(input.toUri(), conf);
>>      Collection<VectorWritable>  points = Lists.newArrayList();
>> -    for (Vector v : raw) {
>> +    Random r = new Random(123);
>> +    Vector[] permutedRaw = new Vector[raw.length];
>> +    for (int i = 0; i<  raw.length; i++)
>> +      permutedRaw = raw;
>> +    for (int i = 0; i<  permutedRaw.length; i++)
>> +      permutedRaw[i] = permutedRaw[i + r.nextInt(raw.length - i)];
>> +    for (Vector v : permutedRaw) {
>>        points.add(new VectorWritable(v));
>>      }
>>      ClusteringTestUtils.writePointsToFile(points,
>> @@ -376,10 +384,12 @@ public final class TestMeanShift extends
>>          optKey(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION), "0.2",
>>          optKey(DefaultOptionCreator.OVERWRITE_OPTION) };
>>      ToolRunner.run(conf, new MeanShiftCanopyDriver(), args);
>> -    Path outPart = new Path(output, "clusters-4-final/part-r-00000");
>> -    long count = HadoopUtil.countRecords(outPart, conf);
>> -    assertEquals("count", 3, count);
>> -    outPart = new Path(output, "clusters-0/part-m-00000");
>> +    FileStatus[] outParts = FileSystem.get(conf).globStatus(
>> +        new Path(output, "clusters-?-final/part-r-*"));
>> +    assertEquals("Wrong number of matching final parts", 1, outParts.length);
>> +    long count = HadoopUtil.countRecords(outParts[0].getPath(), conf);
>> +    assertEquals("count", 5, count);
>> +    Path outPart = new Path(output, "clusters-0/part-m-00000");
>>      Iterator<?>  iterator = new SequenceFileValueIterator<Writable>(outPart,
>>          true, conf);
>>      // now test the initial clusters to ensure the type of their centers has
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java Mon Mar 12 18:25:45 2012
>> @@ -1,26 +0,0 @@
>> -/**
>> - * Licensed to the Apache Software Foundation (ASF) under one
>> - * or more contributor license agreements. See the NOTICE file
>> - * distributed with this work for additional information
>> - * regarding copyright ownership. The ASF licenses this file
>> - * to you under the Apache License, Version 2.0 (the
>> - * "License"); you may not use this file except in compliance
>> - * with the License. You may obtain a copy of the License at
>> - *
>> - * http://www.apache.org/licenses/LICENSE-2.0
>> - *
>> - * Unless required by applicable law or agreed to in writing,
>> - * software distributed under the License is distributed on an
>> - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> - * KIND, either express or implied. See the License for the
>> - * specific language governing permissions and limitations
>> - * under the License.
>> - */
>> -
>> -package org.apache.mahout.common;
>> -
>> -import org.apache.hadoop.mapreduce.Counter;
>> -
>> -final class DummyCounter extends Counter {
>> -
>> -}
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java Mon Mar 12 18:25:45 2012
>> @@ -17,16 +17,21 @@
>> 
>>  package org.apache.mahout.common;
>> 
>> +import com.google.common.collect.Lists;
>> +
>>  import java.io.IOException;
>> +import java.lang.reflect.Constructor;
>> +import java.lang.reflect.Method;
>>  import java.util.List;
>>  import java.util.Map;
>>  import java.util.Set;
>>  import java.util.TreeMap;
>> 
>> -import com.google.common.collect.Lists;
>>  import org.apache.hadoop.conf.Configuration;
>> +import org.apache.hadoop.mapreduce.MapContext;
>>  import org.apache.hadoop.mapreduce.Mapper;
>>  import org.apache.hadoop.mapreduce.RecordWriter;
>> +import org.apache.hadoop.mapreduce.ReduceContext;
>>  import org.apache.hadoop.mapreduce.Reducer;
>>  import org.apache.hadoop.mapreduce.TaskAttemptContext;
>>  import org.apache.hadoop.mapreduce.TaskAttemptID;
>> @@ -65,7 +70,18 @@ public final class DummyRecordWriter<K,
>>                                                                        Configuration configuration,
>>                                                                        RecordWriter<K2, V2>  output)
>>      throws IOException, InterruptedException {
>> -    return mapper.new Context(configuration, new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
>> +
>> +    // Use reflection since the context types changed incompatibly between 0.20
>> +    // and 0.23.
>> +    try {
>> +      return buildNewMapperContext(configuration, output);
>> +    } catch (Exception e) {
>> +      try {
>> +        return buildOldMapperContext(mapper, configuration, output);
>> +      } catch (Exception ex) {
>> +        throw new IllegalStateException(ex);
>> +      }
>> +    }
>>    }
>> 
>>    public static<K1, V1, K2, V2>  Reducer<K1, V1, K2, V2>.Context build(Reducer<K1, V1, K2, V2>  reducer,
>> @@ -74,17 +90,96 @@ public final class DummyRecordWriter<K,
>>                                                                         Class<K1>  keyClass,
>>                                                                         Class<V1>  valueClass)
>>      throws IOException, InterruptedException {
>> -    return reducer.new Context(configuration,
>> -                               new TaskAttemptID(),
>> -                               new MockIterator(),
>> -                               null,
>> -                               null,
>> -                               output,
>> -                               null,
>> -                               new DummyStatusReporter(),
>> -                               null,
>> -                               keyClass,
>> -                               valueClass);
>> +
>> +    // Use reflection since the context types changed incompatibly between 0.20
>> +    // and 0.23.
>> +    try {
>> +      return buildNewReducerContext(configuration, output, keyClass, valueClass);
>> +    } catch (Exception e) {
>> +      try {
>> +        return buildOldReducerContext(reducer, configuration, output, keyClass, valueClass);
>> +      } catch (Exception ex) {
>> +        throw new IllegalStateException(ex);
>> +      }
>> +    }
>> +  }
>> +
>> +  @SuppressWarnings({ "unchecked", "rawtypes" })
>> +  private static<K1, V1, K2, V2>  Mapper<K1, V1, K2, V2>.Context buildNewMapperContext(
>> +      Configuration configuration, RecordWriter<K2, V2>  output) throws Exception {
>> +    Class<?>  mapContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl");
>> +    Constructor<?>  cons = mapContextImplClass.getConstructors()[0];
>> +    Object mapContextImpl = cons.newInstance(configuration,
>> +        new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
>> +
>> +    Class<?>  wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
>> +    Object wrappedMapper = wrappedMapperClass.newInstance();
>> +    Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
>> +    return (Mapper.Context) getMapContext.invoke(wrappedMapper, mapContextImpl);
>> +  }
>> +
>> +  @SuppressWarnings({ "unchecked", "rawtypes" })
>> +  private static<K1, V1, K2, V2>  Mapper<K1, V1, K2, V2>.Context buildOldMapperContext(
>> +      Mapper<K1, V1, K2, V2>  mapper, Configuration configuration,
>> +      RecordWriter<K2, V2>  output) throws Exception {
>> +    Constructor<?>  cons = getNestedContextConstructor(mapper.getClass());
>> +    // first argument to the constructor is the enclosing instance
>> +    return (Mapper.Context) cons.newInstance(mapper, configuration,
>> +        new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
>> +  }
>> +
>> +  @SuppressWarnings({ "unchecked", "rawtypes" })
>> +  private static<K1, V1, K2, V2>  Reducer<K1, V1, K2, V2>.Context buildNewReducerContext(
>> +      Configuration configuration, RecordWriter<K2, V2>  output, Class<K1>  keyClass,
>> +      Class<V1>  valueClass) throws Exception {
>> +    Class<?>  reduceContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.ReduceContextImpl");
>> +    Constructor<?>  cons = reduceContextImplClass.getConstructors()[0];
>> +    Object reduceContextImpl = cons.newInstance(configuration,
>> +      new TaskAttemptID(),
>> +      new MockIterator(),
>> +      null,
>> +      null,
>> +      output,
>> +      null,
>> +      new DummyStatusReporter(),
>> +      null,
>> +      keyClass,
>> +      valueClass);
>> +
>> +    Class<?>  wrappedReducerClass = Class.forName("org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer");
>> +    Object wrappedReducer = wrappedReducerClass.newInstance();
>> +    Method getReducerContext = wrappedReducerClass.getMethod("getReducerContext", ReduceContext.class);
>> +    return (Reducer.Context) getReducerContext.invoke(wrappedReducer, reduceContextImpl);
>> +  }
>> +
>> +  @SuppressWarnings({ "unchecked", "rawtypes" })
>> +  private static<K1, V1, K2, V2>  Reducer<K1, V1, K2, V2>.Context buildOldReducerContext(
>> +      Reducer<K1, V1, K2, V2>  reducer, Configuration configuration,
>> +      RecordWriter<K2, V2>  output, Class<K1>  keyClass,
>> +      Class<V1>  valueClass) throws Exception {
>> +    Constructor<?>  cons = getNestedContextConstructor(reducer.getClass());
>> +    // first argument to the constructor is the enclosing instance
>> +    return (Reducer.Context) cons.newInstance(reducer,
>> +        configuration,
>> +        new TaskAttemptID(),
>> +        new MockIterator(),
>> +        null,
>> +        null,
>> +        output,
>> +        null,
>> +        new DummyStatusReporter(),
>> +        null,
>> +        keyClass,
>> +        valueClass);
>> +  }
>> +
>> +  private static Constructor<?>  getNestedContextConstructor(Class<?>  outerClass) {
>> +    for (Class<?>  nestedClass : outerClass.getClasses()) {
>> +      if ("Context".equals(nestedClass.getSimpleName())) {
>> +        return nestedClass.getConstructors()[0];
>> +      }
>> +    }
>> +    throw new IllegalStateException("Cannot find context class for " + outerClass);
>>    }
>> 
>>  }
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Mon Mar 12 18:25:45 2012
>> @@ -19,6 +19,8 @@
>> 
>>  package org.apache.mahout.common;
>> 
>> +import static org.easymock.EasyMock.createMockBuilder;
>> +
>>  import java.util.Map;
>> 
>>  import com.google.common.collect.Maps;
>> @@ -30,10 +32,21 @@ public final class DummyStatusReporter e
>>    private final Map<Enum<?>, Counter>  counters = Maps.newHashMap();
>>    private final Map<String, Counter>  counterGroups = Maps.newHashMap();
>> 
>> +  private Counter newCounter() {
>> +    try {
>> +      // 0.23 case
>> +      String c = "org.apache.hadoop.mapreduce.counters.GenericCounter";
>> +      return (Counter) createMockBuilder(Class.forName(c)).createMock();
>> +    } catch (ClassNotFoundException e) {
>> +      // 0.20 case
>> +      return createMockBuilder(Counter.class).createMock();
>> +    }
>> +  }
>> +
>>    @Override
>>    public Counter getCounter(Enum<?>  name) {
>>      if (!counters.containsKey(name)) {
>> -      counters.put(name, new DummyCounter());
>> +      counters.put(name, newCounter());
>>      }
>>      return counters.get(name);
>>    }
>> @@ -42,7 +55,7 @@ public final class DummyStatusReporter e
>>    @Override
>>    public Counter getCounter(String group, String name) {
>>      if (!counterGroups.containsKey(group + name)) {
>> -      counterGroups.put(group + name, new DummyCounter());
>> +      counterGroups.put(group + name, newCounter());
>>      }
>>      return counterGroups.get(group+name);
>>    }
>> @@ -55,4 +68,8 @@ public final class DummyStatusReporter e
>>    public void setStatus(String status) {
>>    }
>> 
>> +  public float getProgress() {
>> +    return 0;
>> +  }
>> +
>>  }
>> 
>> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java (original)
>> +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java Mon Mar 12 18:25:45 2012
>> @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileStatus;
>>  import org.apache.hadoop.fs.FileSystem;
>>  import org.apache.hadoop.fs.Path;
>>  import org.apache.mahout.clustering.ClusteringTestUtils;
>> +import org.apache.mahout.common.HadoopUtil;
>>  import org.apache.mahout.common.MahoutTestCase;
>>  import org.apache.mahout.common.iterator.sequencefile.PathFilters;
>>  import org.apache.mahout.math.DenseVector;
>> @@ -254,14 +255,14 @@ public final class TestDistributedRowMat
>> 
>>      deleteContentsOfPath(conf, outputPath);
>> 
>> -    assertEquals(0, fs.listStatus(outputPath).length);
>> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>> 
>>      Vector result1 = dm.times(v);
>> 
>> -    assertEquals(0, fs.listStatus(outputPath).length);
>> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>> 
>>      deleteContentsOfPath(conf, outputPath);
>> -    assertEquals(0, fs.listStatus(outputPath).length);
>> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>> 
>>      conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
>>      dm.setConf(conf);
>> @@ -291,14 +292,14 @@ public final class TestDistributedRowMat
>> 
>>      deleteContentsOfPath(conf, outputPath);
>> 
>> -    assertEquals(0, fs.listStatus(outputPath).length);
>> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>> 
>>      Vector result1 = dm.timesSquared(v);
>> 
>> -    assertEquals(0, fs.listStatus(outputPath).length);
>> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>> 
>>      deleteContentsOfPath(conf, outputPath);
>> -    assertEquals(0, fs.listStatus(outputPath).length);
>> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>> 
>>      conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
>>      dm.setConf(conf);
>> @@ -325,7 +326,7 @@ public final class TestDistributedRowMat
>>    private static void deleteContentsOfPath(Configuration conf, Path path) throws Exception {
>>      FileSystem fs = path.getFileSystem(conf);
>> 
>> -    FileStatus[] statuses = fs.listStatus(path);
>> +    FileStatus[] statuses = HadoopUtil.listStatus(fs, path);
>>      for (FileStatus status : statuses) {
>>        fs.delete(status.getPath(), true);
>>      }
>> 
>> Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
>> URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
>> +++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Mon Mar 12 18:25:45 2012
>> @@ -193,7 +193,7 @@ public final class TestClusterDumper ext
>>          output, measure, 8, 4, true, 0.0, true);
>>      // run ClusterDumper
>>      ClusterDumper clusterDumper = new ClusterDumper(new Path(output,
>> -        "clusters-0"), new Path(output, "clusteredPoints"));
>> +        "clusters-0-final"), new Path(output, "clusteredPoints"));
>>      clusterDumper.printClusters(termDictionary);
>>    }
>> 
>> 
>> Modified: mahout/trunk/pom.xml
>> URL: http://svn.apache.org/viewvc/mahout/trunk/pom.xml?rev=1299770&r1=1299769&r2=1299770&view=diff
>> ==============================================================================
>> --- mahout/trunk/pom.xml (original)
>> +++ mahout/trunk/pom.xml Mon Mar 12 18:25:45 2012
>> @@ -107,6 +107,17 @@
>>      <url>https://issues.apache.org/jira/browse/MAHOUT</url>
>>    </issueManagement>
>> 
>> +<repositories>
>> +<repository>
>> +<id>apache.snapshots</id>
>> +<name>Apache Snapshot Repository</name>
>> +<url>http://repository.apache.org/snapshots</url>
>> +<releases>
>> +<enabled>false</enabled>
>> +</releases>
>> +</repository>
>> +</repositories>
>> +
>>    <dependencyManagement>
>>      <dependencies>
>> 
>> @@ -264,6 +275,100 @@
>>          </exclusions>
>>        </dependency>
>>        <dependency>
>> +<groupId>org.apache.hadoop</groupId>
>> +<artifactId>hadoop-common</artifactId>
>> +<version>${hadoop.version}</version>
>> +<exclusions>
>> +<exclusion>
>> +<groupId>net.sf.kosmosfs</groupId>
>> +<artifactId>kfs</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.mortbay.jetty</groupId>
>> +<artifactId>jetty</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.mortbay.jetty</groupId>
>> +<artifactId>jetty-util</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>hsqldb</groupId>
>> +<artifactId>hsqldb</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>commons-el</groupId>
>> +<artifactId>commons-el</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>junit</groupId>
>> +<artifactId>junit</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>oro</groupId>
>> +<artifactId>oro</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.mortbay.jetty</groupId>
>> +<artifactId>jsp-2.1</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.mortbay.jetty</groupId>
>> +<artifactId>jsp-api-2.1</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.mortbay.jetty</groupId>
>> +<artifactId>servlet-api-2.5</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>commons-net</groupId>
>> +<artifactId>commons-net</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>tomcat</groupId>
>> +<artifactId>jasper-runtime</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>tomcat</groupId>
>> +<artifactId>jasper-compiler</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>xmlenc</groupId>
>> +<artifactId>xmlenc</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>net.java.dev.jets3t</groupId>
>> +<artifactId>jets3t</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.eclipse.jdt</groupId>
>> +<artifactId>core</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.slf4j</groupId>
>> +<artifactId>slf4j-api</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.slf4j</groupId>
>> +<artifactId>slf4j-jcl</artifactId>
>> +</exclusion>
>> +<exclusion>
>> +<groupId>org.slf4j</groupId>
>> +<artifactId>slf4j-log4j12</artifactId>
>> +</exclusion>
>> +</exclusions>
>> +</dependency>
>> +<dependency>
>> +<groupId>org.apache.hadoop</groupId>
>> +<artifactId>hadoop-mapreduce-client-core</artifactId>
>> +<version>${hadoop.version}</version>
>> +</dependency>
>> +<dependency>
>> +<groupId>org.apache.hadoop</groupId>
>> +<artifactId>hadoop-mapreduce-client-common</artifactId>
>> +<version>${hadoop.version}</version>
>> +</dependency>
>> +
>> +<dependency>
>>          <groupId>org.codehaus.jackson</groupId>
>>          <artifactId>jackson-core-asl</artifactId>
>>          <version>1.8.2</version>
>> 
>> 
> 

--------------------------------------------
Grant Ingersoll
http://www.lucidimagination.com

Re: svn commit: r1299770 - in /mahout/trunk: ./ core/ core/src/main/java/org/apache/mahout/common/ core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/ core/src/test/java/org/apache/mahout/clustering/canopy/ core/src/test/java/org/apache/m...

Posted by tom pierce <tc...@apache.org>.

Can someone hook me up with JIRA privs so I can close tickets?

(Or, if that isn't something all committers get, someone pls mark -822 
and -980 closed)

-tom

On 03/12/2012 02:25 PM, tcp@apache.org wrote:
> Author: tcp
> Date: Mon Mar 12 18:25:45 2012
> New Revision: 1299770
>
> URL: http://svn.apache.org/viewvc?rev=1299770&view=rev
> Log:
> MAHOUT-822: Make Mahout compatible with Hadoop 0.23.1.
>
> Modified:
>      mahout/trunk/core/pom.xml
>      mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
>      mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
>      mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
>      mahout/trunk/pom.xml
>
> Modified: mahout/trunk/core/pom.xml
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/pom.xml (original)
> +++ mahout/trunk/core/pom.xml Mon Mar 12 18:25:45 2012
> @@ -140,10 +140,6 @@
>
>       <!-- Third Party -->
>       <dependency>
> -<groupId>org.apache.hadoop</groupId>
> -<artifactId>hadoop-core</artifactId>
> -</dependency>
> -<dependency>
>         <groupId>org.codehaus.jackson</groupId>
>         <artifactId>jackson-core-asl</artifactId>
>       </dependency>
> @@ -211,4 +207,43 @@
>       </dependency>
>
>     </dependencies>
> +
> +<profiles>
> +<profile>
> +<id>hadoop-0.20</id>
> +<activation>
> +<property>
> +<name>!hadoop.version</name>
> +</property>
> +</activation>
> +<dependencies>
> +<dependency>
> +<groupId>org.apache.hadoop</groupId>
> +<artifactId>hadoop-core</artifactId>
> +</dependency>
> +</dependencies>
> +</profile>
> +<profile>
> +<id>hadoop-0.23</id>
> +<activation>
> +<property>
> +<name>hadoop.version</name>
> +</property>
> +</activation>
> +<dependencies>
> +<dependency>
> +<groupId>org.apache.hadoop</groupId>
> +<artifactId>hadoop-common</artifactId>
> +</dependency>
> +<dependency>
> +<groupId>org.apache.hadoop</groupId>
> +<artifactId>hadoop-mapreduce-client-common</artifactId>
> +</dependency>
> +<dependency>
> +<groupId>org.apache.hadoop</groupId>
> +<artifactId>hadoop-mapreduce-client-core</artifactId>
> +</dependency>
> +</dependencies>
> +</profile>
> +</profiles>
>   </project>
>
> Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
> +++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Mon Mar 12 18:25:45 2012
> @@ -17,6 +17,7 @@
>
>   package org.apache.mahout.common;
>
> +import java.io.FileNotFoundException;
>   import java.io.IOException;
>   import java.io.InputStream;
>   import java.net.URI;
> @@ -229,9 +230,9 @@ public final class HadoopUtil {
>       FileStatus[] statuses;
>       FileSystem fs = path.getFileSystem(conf);
>       if (filter == null) {
> -      statuses = pathType == PathType.GLOB ? fs.globStatus(path) : fs.listStatus(path);
> +      statuses = pathType == PathType.GLOB ? fs.globStatus(path) : listStatus(fs, path);
>       } else {
> -      statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : fs.listStatus(path, filter);
> +      statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : listStatus(fs, path, filter);
>       }
>       if (ordering != null) {
>         Arrays.sort(statuses, ordering);
> @@ -239,6 +240,22 @@ public final class HadoopUtil {
>       return statuses;
>     }
>
> +  public static FileStatus[] listStatus(FileSystem fs, Path path) throws IOException {
> +    try {
> +      return fs.listStatus(path);
> +    } catch (FileNotFoundException e) {
> +      return new FileStatus[0];
> +    }
> +  }
> +
> +  public static FileStatus[] listStatus(FileSystem fs, Path path, PathFilter filter) throws IOException {
> +    try {
> +      return fs.listStatus(path, filter);
> +    } catch (FileNotFoundException e) {
> +      return new FileStatus[0];
> +    }
> +  }
> +
>     public static void cacheFiles(Path fileToCache, Configuration conf) {
>       DistributedCache.setCacheFiles(new URI[]{fileToCache.toUri()}, conf);
>     }
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java Mon Mar 12 18:25:45 2012
> @@ -1,70 +0,0 @@
> -/**
> - * Licensed to the Apache Software Foundation (ASF) under one or more
> - * contributor license agreements.  See the NOTICE file distributed with
> - * this work for additional information regarding copyright ownership.
> - * The ASF licenses this file to You under the Apache License, Version 2.0
> - * (the "License"); you may not use this file except in compliance with
> - * the License.  You may obtain a copy of the License at
> - *
> - *     http://www.apache.org/licenses/LICENSE-2.0
> - *
> - * Unless required by applicable law or agreed to in writing, software
> - * distributed under the License is distributed on an "AS IS" BASIS,
> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> - * See the License for the specific language governing permissions and
> - * limitations under the License.
> - */
> -package org.apache.mahout.classifier.df.mapreduce.partial;
> -
> -import java.io.IOException;
> -
> -import org.apache.hadoop.conf.Configuration;
> -import org.apache.hadoop.mapreduce.Mapper;
> -import org.apache.hadoop.mapreduce.TaskAttemptID;
> -import org.apache.hadoop.mapreduce.Mapper.Context;
> -import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
> -
> -/**
> - * Special implementation that collects the output of the mappers
> - */
> -final class MockContext extends Context {
> -
> -  private final TreeID[] keys;
> -  private final MapredOutput[] values;
> -  private int index;
> -
> -  MockContext(Mapper<?,?,?,?>  mapper, Configuration conf, TaskAttemptID taskid, int nbTrees)
> -    throws IOException, InterruptedException {
> -    mapper.super(conf, taskid, null, null, null, null, null);
> -
> -    keys = new TreeID[nbTrees];
> -    values = new MapredOutput[nbTrees];
> -  }
> -
> -  @Override
> -  public void write(Object key, Object value) throws IOException {
> -    if (index == keys.length) {
> -      throw new IOException("Received more output than expected : " + index);
> -    }
> -
> -    keys[index] = ((TreeID) key).clone();
> -    values[index] = ((MapredOutput) value).clone();
> -
> -    index++;
> -  }
> -
> -  /**
> -   * @return number of outputs collected
> -   */
> -  public int nbOutputs() {
> -    return index;
> -  }
> -
> -  public TreeID[] getKeys() {
> -    return keys;
> -  }
> -
> -  public MapredOutput[] getValues() {
> -    return values;
> -  }
> -}
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java Mon Mar 12 18:25:45 2012
> @@ -1,176 +0,0 @@
> -/**
> - * Licensed to the Apache Software Foundation (ASF) under one or more
> - * contributor license agreements.  See the NOTICE file distributed with
> - * this work for additional information regarding copyright ownership.
> - * The ASF licenses this file to You under the Apache License, Version 2.0
> - * (the "License"); you may not use this file except in compliance with
> - * the License.  You may obtain a copy of the License at
> - *
> - *     http://www.apache.org/licenses/LICENSE-2.0
> - *
> - * Unless required by applicable law or agreed to in writing, software
> - * distributed under the License is distributed on an "AS IS" BASIS,
> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> - * See the License for the specific language governing permissions and
> - * limitations under the License.
> - */
> -
> -package org.apache.mahout.classifier.df.mapreduce.partial;
> -
> -import java.io.IOException;
> -import java.util.List;
> -
> -import org.apache.commons.lang.ArrayUtils;
> -import org.apache.hadoop.conf.Configuration;
> -import org.apache.hadoop.fs.Path;
> -import org.apache.hadoop.io.LongWritable;
> -import org.apache.hadoop.io.Text;
> -import org.apache.hadoop.mapreduce.InputSplit;
> -import org.apache.hadoop.mapreduce.Job;
> -import org.apache.hadoop.mapreduce.RecordReader;
> -import org.apache.hadoop.mapreduce.TaskAttemptContext;
> -import org.apache.hadoop.mapreduce.TaskAttemptID;
> -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
> -import org.apache.mahout.classifier.df.DFUtils;
> -import org.apache.mahout.classifier.df.DecisionForest;
> -import org.apache.mahout.classifier.df.builder.TreeBuilder;
> -import org.apache.mahout.classifier.df.data.Dataset;
> -import org.apache.mahout.classifier.df.mapreduce.Builder;
> -import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
> -import org.apache.mahout.classifier.df.node.Node;
> -import org.slf4j.Logger;
> -import org.slf4j.LoggerFactory;
> -
> -import com.google.common.collect.Lists;
> -
> -/**
> - * Simulates the Partial mapreduce implementation in a sequential manner. Must
> - * receive a seed
> - */
> -public class PartialSequentialBuilder extends PartialBuilder {
> -
> -  private static final Logger log = LoggerFactory.getLogger(PartialSequentialBuilder.class);
> -
> -  private MockContext firstOutput;
> -
> -  private final Dataset dataset;
> -
> -  public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
> -      Dataset dataset, long seed, Configuration conf) {
> -    super(treeBuilder, dataPath, new Path("notUsed"), seed, conf);
> -    this.dataset = dataset;
> -  }
> -
> -  public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
> -      Dataset dataset, long seed) {
> -    this(treeBuilder, dataPath, dataset, seed, new Configuration());
> -  }
> -
> -  @Override
> -  protected void configureJob(Job job)
> -      throws IOException {
> -    Configuration conf = job.getConfiguration();
> -
> -    int num = conf.getInt("mapred.map.tasks", -1);
> -
> -    super.configureJob(job);
> -
> -    // PartialBuilder sets the number of maps to 1 if we are running in 'local'
> -    conf.setInt("mapred.map.tasks", num);
> -  }
> -
> -  @Override
> -  protected boolean runJob(Job job) throws IOException, InterruptedException {
> -    Configuration conf = job.getConfiguration();
> -
> -    // retrieve the splits
> -    TextInputFormat input = new TextInputFormat();
> -    List<InputSplit>  splits = input.getSplits(job);
> -
> -    int nbSplits = splits.size();
> -    log.debug("Nb splits : {}", nbSplits);
> -
> -    InputSplit[] sorted = new InputSplit[nbSplits];
> -    splits.toArray(sorted);
> -    Builder.sortSplits(sorted);
> -
> -    int numTrees = Builder.getNbTrees(conf); // total number of trees
> -
> -    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());
> -
> -    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);
> -
> -    /* first instance id in hadoop's order */
> -    //int[] firstIds = new int[nbSplits];
> -    /* partitions' sizes in hadoop order */
> -    int[] sizes = new int[nbSplits];
> -
> -    // to compute firstIds, process the splits in file order
> -    long slowest = 0; // duration of slowest map
> -    int firstId = 0;
> -    for (InputSplit split : splits) {
> -      int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
> -
> -      RecordReader<LongWritable, Text>  reader = input.createRecordReader(split, task);
> -      reader.initialize(split, task);
> -
> -      Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(),
> -                                               hp, nbSplits, numTrees);
> -
> -      long time = System.currentTimeMillis();
> -
> -      //firstIds[hp] = firstId;
> -
> -      while (reader.nextKeyValue()) {
> -        mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
> -        firstId++;
> -        sizes[hp]++;
> -      }
> -
> -      mapper.cleanup(firstOutput);
> -
> -      time = System.currentTimeMillis() - time;
> -      log.info("Duration : {}", DFUtils.elapsedTime(time));
> -
> -      if (time>  slowest) {
> -        slowest = time;
> -      }
> -    }
> -
> -    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
> -    return true;
> -  }
> -
> -  @Override
> -  protected DecisionForest parseOutput(Job job) throws IOException {
> -    return processOutput(firstOutput.getKeys(), firstOutput.getValues());
> -  }
> -
> -  /**
> -   * extract the decision forest
> -   */
> -  protected static DecisionForest processOutput(TreeID[] keys, MapredOutput[] values) {
> -    List<Node>  trees = Lists.newArrayList();
> -
> -    for (int index = 0; index<  keys.length; index++) {
> -      MapredOutput value = values[index];
> -      trees.add(value.getTree());
> -    }
> -
> -    return new DecisionForest(trees);
> -  }
> -
> -  /**
> -   * Special Step1Mapper that can be configured without using a Configuration
> -   *
> -   */
> -  private static class MockStep1Mapper extends Step1Mapper {
> -    protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
> -        int partition, int numMapTasks, int numTrees) {
> -      configure(false, treeBuilder, dataset);
> -      configure(seed, partition, numMapTasks, numTrees);
> -    }
> -
> -  }
> -
> -}
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java Mon Mar 12 18:25:45 2012
> @@ -17,21 +17,30 @@
>
>   package org.apache.mahout.classifier.df.mapreduce.partial;
>
> +import static org.easymock.EasyMock.anyObject;
> +import static org.easymock.EasyMock.capture;
> +import static org.easymock.EasyMock.createMock;
> +import static org.easymock.EasyMock.expectLastCall;
> +import static org.easymock.EasyMock.replay;
> +import static org.easymock.EasyMock.verify;
> +
>   import java.util.Random;
>
> -import org.apache.hadoop.conf.Configuration;
>   import org.apache.hadoop.io.LongWritable;
>   import org.apache.hadoop.io.Text;
> -import org.apache.hadoop.mapreduce.TaskAttemptID;
> -import org.apache.mahout.common.MahoutTestCase;
> +import org.apache.hadoop.mapreduce.Mapper;
>   import org.apache.mahout.common.RandomUtils;
>   import org.apache.mahout.classifier.df.builder.TreeBuilder;
>   import org.apache.mahout.classifier.df.data.Data;
>   import org.apache.mahout.classifier.df.data.DataLoader;
>   import org.apache.mahout.classifier.df.data.Dataset;
>   import org.apache.mahout.classifier.df.data.Utils;
> +import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
>   import org.apache.mahout.classifier.df.node.Leaf;
>   import org.apache.mahout.classifier.df.node.Node;
> +import org.apache.mahout.common.MahoutTestCase;
> +import org.easymock.Capture;
> +import org.easymock.CaptureType;
>   import org.junit.Test;
>
>   public final class Step1MapperTest extends MahoutTestCase {
> @@ -71,6 +80,17 @@ public final class Step1MapperTest exten
>       }
>     }
>
> +  private static class TreeIDCapture extends Capture<TreeID>  {
> +
> +    public TreeIDCapture() {
> +      super(CaptureType.ALL);
> +    }
> +
> +    public void setValue(final TreeID value) {
> +      super.setValue(value.clone());
> +    }
> +  }
> +
>     /** nb attributes per generated data instance */
>     static final int NUM_ATTRIBUTES = 4;
>
> @@ -83,6 +103,7 @@ public final class Step1MapperTest exten
>     /** nb mappers to use */
>     static final int NUM_MAPPERS = 2;
>
> +  @SuppressWarnings({ "rawtypes", "unchecked" })
>     @Test
>     public void testMapper() throws Exception {
>       Long seed = null;
> @@ -109,8 +130,13 @@ public final class Step1MapperTest exten
>         // expected number of trees that this mapper will build
>         int mapNbTrees = Step1Mapper.nbTrees(NUM_MAPPERS, NUM_TREES, partition);
>
> -      MockContext context = new MockContext(new Step1Mapper(),
> -          new Configuration(), new TaskAttemptID(), mapNbTrees);
> +      Mapper.Context context =
> +        createMock(Mapper.Context.class);
> +      Capture<TreeID>  capturedKeys = new TreeIDCapture();
> +      context.write(capture(capturedKeys), anyObject());
> +      expectLastCall().anyTimes();
> +
> +      replay(context);
>
>         MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed,
>             partition, NUM_MAPPERS, NUM_TREES);
> @@ -125,12 +151,13 @@ public final class Step1MapperTest exten
>         }
>
>         mapper.cleanup(context);
> +      verify(context);
>
>         // make sure the mapper built all its trees
> -      assertEquals(mapNbTrees, context.nbOutputs());
> +      assertEquals(mapNbTrees, capturedKeys.getValues().size());
>
>         // check the returned keys
> -      for (TreeID k : context.getKeys()) {
> +      for (TreeID k : capturedKeys.getValues()) {
>           assertEquals(partition, k.partition());
>           assertEquals(treeIndex, k.treeId());
>
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Mon Mar 12 18:25:45 2012
> @@ -34,6 +34,7 @@ import org.apache.mahout.clustering.Clus
>   import org.apache.mahout.common.DummyRecordWriter;
>   import org.apache.mahout.common.HadoopUtil;
>   import org.apache.mahout.common.MahoutTestCase;
> +import org.apache.mahout.common.Pair;
>   import org.apache.mahout.common.commandline.DefaultOptionCreator;
>   import org.apache.mahout.common.distance.DistanceMeasure;
>   import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
> @@ -126,8 +127,8 @@ public final class TestCanopyCreation ex
>         int[] expectedNumPoints = { 4, 4, 3 };
>         double[][] expectedCentroids = { { 1.5, 1.5 }, { 4.0, 4.0 },
>             { 4.666666666666667, 4.6666666666666667 } };
> -      assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
> -          testCanopy.getNumObservations());
> +      assertEquals("canopy points " + canopyIx, testCanopy.getNumObservations(),
> +                   expectedNumPoints[canopyIx]);
>         double[] refCentroid = expectedCentroids[canopyIx];
>         Vector testCentroid = testCanopy.computeCentroid();
>         for (int pointIx = 0; pointIx<  refCentroid.length; pointIx++) {
> @@ -151,8 +152,8 @@ public final class TestCanopyCreation ex
>           { 4.666666666666667, 4.666666666666667 } };
>       for (int canopyIx = 0; canopyIx<  referenceEuclidean.size(); canopyIx++) {
>         Canopy testCanopy = referenceEuclidean.get(canopyIx);
> -      assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
> -          testCanopy.getNumObservations());
> +      assertEquals("canopy points " + canopyIx, testCanopy.getNumObservations(),
> +                   expectedNumPoints[canopyIx]);
>         double[] refCentroid = expectedCentroids[canopyIx];
>         Vector testCentroid = testCanopy.computeCentroid();
>         for (int pointIx = 0; pointIx<  refCentroid.length; pointIx++) {
> @@ -328,20 +329,36 @@ public final class TestCanopyCreation ex
>         Canopy canopy = new Canopy();
>         assertTrue("more to come", reader.next(key, canopy));
>         assertEquals("1st key", "C-0", key.toString());
> -      assertEquals("1st x value", 1.5, canopy.getCenter().get(0), EPSILON);
> -      assertEquals("1st y value", 1.5, canopy.getCenter().get(1), EPSILON);
> +
> +      List<Pair<Double,Double>>  refCenters = Lists.newArrayList();
> +      refCenters.add(new Pair<Double,Double>(1.5,1.5));
> +      refCenters.add(new Pair<Double,Double>(4.333333333333334,4.333333333333334));
> +      Pair<Double,Double>  c = new Pair<Double,Double>(canopy.getCenter().get(0),
> +                                                      canopy.getCenter().get(1));
> +      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
>         assertTrue("more to come", reader.next(key, canopy));
>         assertEquals("2nd key", "C-1", key.toString());
> -      assertEquals("2nd x value", 4.333333333333334, canopy.getCenter().get(0),
> -          EPSILON);
> -      assertEquals("2nd y value", 4.333333333333334, canopy.getCenter().get(1),
> -          EPSILON);
> +      c = new Pair<Double,Double>(canopy.getCenter().get(0),
> +                                  canopy.getCenter().get(1));
> +      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
>         assertFalse("more to come", reader.next(key, canopy));
>       } finally {
>         Closeables.closeQuietly(reader);
>       }
>     }
>
> +  boolean findAndRemove(Pair<Double,Double>  target,
> +                        List<Pair<Double,Double>>  list, double epsilon) {
> +    for (Pair<Double,Double>  curr : list) {
> +      if ( (Math.abs(target.getFirst() - curr.getFirst())<  epsilon)
> +&&  (Math.abs(target.getSecond() - curr.getSecond())<  epsilon) ) {
> +        list.remove(curr);
> +        return true;
> +      }
> +    }
> +    return false;
> +  }
> +
>     /**
>      * Story: User can produce final canopy centers using a Hadoop map/reduce job
>      * and a EuclideanDistanceMeasure.
> @@ -368,14 +385,18 @@ public final class TestCanopyCreation ex
>         Canopy value = new Canopy();
>         assertTrue("more to come", reader.next(key, value));
>         assertEquals("1st key", "C-0", key.toString());
> -      assertEquals("1st x value", 1.8, value.getCenter().get(0), EPSILON);
> -      assertEquals("1st y value", 1.8, value.getCenter().get(1), EPSILON);
> +
> +      List<Pair<Double,Double>>  refCenters = Lists.newArrayList();
> +      refCenters.add(new Pair<Double,Double>(1.8,1.8));
> +      refCenters.add(new Pair<Double,Double>(4.433333333333334, 4.433333333333334));
> +      Pair<Double,Double>  c = new Pair<Double,Double>(value.getCenter().get(0),
> +                                                      value.getCenter().get(1));
> +      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
>         assertTrue("more to come", reader.next(key, value));
>         assertEquals("2nd key", "C-1", key.toString());
> -      assertEquals("2nd x value", 4.433333333333334, value.getCenter().get(0),
> -          EPSILON);
> -      assertEquals("2nd y value", 4.433333333333334, value.getCenter().get(1),
> -          EPSILON);
> +      c = new Pair<Double,Double>(value.getCenter().get(0),
> +                                  value.getCenter().get(1));
> +      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
>         assertFalse("more to come", reader.next(key, value));
>       } finally {
>         Closeables.closeQuietly(reader);
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java Mon Mar 12 18:25:45 2012
> @@ -20,6 +20,9 @@ package org.apache.mahout.clustering.cla
>   import java.io.IOException;
>   import java.util.ArrayList;
>   import java.util.List;
> +import java.util.Set;
> +
> +import com.google.common.collect.Sets;
>
>   import junit.framework.Assert;
>
> @@ -195,9 +198,7 @@ public class ClusterClassificationDriver
>     }
>
>     private void assertVectorsWithOutlierRemoval() {
> -    assertFirstClusterWithOutlierRemoval();
> -    assertSecondClusterWithOutlierRemoval();
> -    assertThirdClusterWithOutlierRemoval();
> +    checkClustersWithOutlierRemoval();
>     }
>
>     private void assertVectorsWithoutOutlierRemoval() {
> @@ -230,25 +231,33 @@ public class ClusterClassificationDriver
>             "{1:1.0,0:2.0}", "{1:2.0,0:1.0}"}, vector.asFormatString()));
>       }
>     }
> -
> -  private void assertThirdClusterWithOutlierRemoval() {
> -    Assert.assertEquals(1, thirdCluster.size());
> -    for (Vector vector : thirdCluster) {
> -      Assert.assertTrue(ArrayUtils.contains(new String[] {"{1:9.0,0:9.0}"},
> -          vector.asFormatString()));
> -    }
> -  }
> -
> -  private void assertSecondClusterWithOutlierRemoval() {
> -    Assert.assertEquals(0, secondCluster.size());
> -  }
> -
> -  private void assertFirstClusterWithOutlierRemoval() {
> -    Assert.assertEquals(1, firstCluster.size());
> -    for (Vector vector : firstCluster) {
> -      Assert.assertTrue(ArrayUtils.contains(new String[] {"{1:1.0,0:1.0}"},
> -          vector.asFormatString()));
> -    }
> +
> +  private void checkClustersWithOutlierRemoval() {
> +    Set<String>  reference = Sets.newHashSet(new String[] {"{1:9.0,0:9.0}",
> +                                                          "{1:1.0,0:1.0}"});
> +    int singletonCnt = 0;
> +    int emptyCnt = 0;
> +
> +    List<List<Vector>>  clusters = Lists.newArrayList();
> +    clusters.add(firstCluster);
> +    clusters.add(secondCluster);
> +    clusters.add(thirdCluster);
> +
> +    for (List<Vector>  vList : clusters) {
> +      if (vList.size() == 0) {
> +        emptyCnt++;
> +      } else {
> +        singletonCnt++;
> +        Assert.assertTrue("expecting only singleton clusters; got size=" + vList.size(),
> +                          vList.size() == 1);
> +        Assert.assertTrue("not expecting cluster:" + vList.get(0).asFormatString(),
> +                          reference.contains(vList.get(0).asFormatString()));
> +        reference.remove(vList.get(0).asFormatString());
> +      }
> +    }
> +    Assert.assertEquals("Different number of empty clusters than expected!", 1, emptyCnt);
> +    Assert.assertEquals("Different number of singletons than expected!", 2, singletonCnt);
> +    Assert.assertEquals("Didn't match all reference clusters!", 0, reference.size());
>     }
> -
> +
>   }
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Mon Mar 12 18:25:45 2012
> @@ -26,6 +26,7 @@ import com.google.common.collect.Lists;
>   import com.google.common.collect.Maps;
>   import com.google.common.io.Closeables;
>   import org.apache.hadoop.conf.Configuration;
> +import org.apache.hadoop.fs.FileStatus;
>   import org.apache.hadoop.fs.FileSystem;
>   import org.apache.hadoop.fs.Path;
>   import org.apache.hadoop.io.IntWritable;
> @@ -38,6 +39,7 @@ import org.apache.hadoop.util.ToolRunner
>   import org.apache.mahout.clustering.AbstractCluster;
>   import org.apache.mahout.clustering.ClusterObservations;
>   import org.apache.mahout.clustering.ClusteringTestUtils;
> +import org.apache.mahout.clustering.canopy.Canopy;
>   import org.apache.mahout.clustering.canopy.CanopyDriver;
>   import org.apache.mahout.clustering.classify.WeightedVectorWritable;
>   import org.apache.mahout.common.DummyOutputCollector;
> @@ -486,6 +488,42 @@ public final class TestKmeansClustering
>       // now run the Canopy job
>       CanopyDriver.run(conf, pointsPath, outputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, false, 0.0, false);
>
> +    DummyOutputCollector<Text, Canopy>  collector1 =
> +        new DummyOutputCollector<Text, Canopy>();
> +
> +    FileStatus[] outParts = FileSystem.get(conf).globStatus(
> +                    new Path(outputPath, "clusters-0-final/*-0*"));
> +    for (FileStatus outPartStat : outParts) {
> +      for (Pair<Text,Canopy>  record :
> +               new SequenceFileIterable<Text,Canopy>(
> +                 outPartStat.getPath(), conf)) {
> +          collector1.collect(record.getFirst(), record.getSecond());
> +      }
> +    }
> +
> +    boolean got15 = false;
> +    boolean got43 = false;
> +    int count = 0;
> +    for (Text k : collector1.getKeys()) {
> +      count++;
> +      List<Canopy>  vl = collector1.getValue(k);
> +      assertEquals("non-singleton centroid!", 1, vl.size());
> +      Vector v = vl.get(0).getCenter();
> +      assertEquals("cetriod vector is wrong length", 2, v.size());
> +      if ( (Math.abs(v.get(0) - 1.5)<  EPSILON)
> +&&  (Math.abs(v.get(1) - 1.5)<  EPSILON)
> +&&  !got15) {
> +        got15 = true;
> +      } else if ( (Math.abs(v.get(0) - 4.333333333333334)<  EPSILON)
> +&&  (Math.abs(v.get(1) - 4.333333333333334)<  EPSILON)
> +&&  !got43) {
> +        got43 = true;
> +      } else {
> +        assertTrue("got unexpected center: "+v+" ["+v.getClass().toString()+"]", false);
> +      }
> +    }
> +    assertEquals("got unexpected number of centers", 2, count);
> +
>       // now run the KMeans job
>       KMeansDriver.run(pointsPath, new Path(outputPath, "clusters-0-final"), outputPath, new EuclideanDistanceMeasure(),
>           0.001, 10, true, false);
> @@ -500,7 +538,28 @@ public final class TestKmeansClustering
>         collector.collect(record.getFirst(), record.getSecond());
>       }
>
> -    assertEquals("num points[0]", 4, collector.getValue(new IntWritable(0)).size());
> -    assertEquals("num points[1]", 5, collector.getValue(new IntWritable(1)).size());
> +    boolean gotLowClust = false;  // clusters should be [1, *] and [2, *]
> +    boolean gotHighClust = false; // vs [3 , *],  [4 , *] and [5, *]
> +    for (IntWritable k : collector.getKeys()) {
> +      List<WeightedVectorWritable>  wvList = collector.getValue(k);
> +      assertTrue("empty cluster!", wvList.size() != 0);
> +      if (wvList.get(0).getVector().get(0)<= 2.0) {
> +        for (WeightedVectorWritable wv : wvList) {
> +          Vector v = wv.getVector();
> +          int idx = v.maxValueIndex();
> +          assertTrue("bad cluster!", v.get(idx)<= 2.0);
> +        }
> +        assertEquals("Wrong size cluster", 4, wvList.size());
> +        gotLowClust= true;
> +      } else {
> +        for (WeightedVectorWritable wv : wvList) {
> +          Vector v = wv.getVector();
> +          int idx = v.minValueIndex();
> +          assertTrue("bad cluster!", v.get(idx)>  2.0);
> +        }
> +        assertEquals("Wrong size cluster", 5, wvList.size());
> +        gotHighClust= true;
> +      }
> +    }
>     }
>   }
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Mon Mar 12 18:25:45 2012
> @@ -21,10 +21,12 @@ import java.util.Collection;
>   import java.util.Iterator;
>   import java.util.List;
>   import java.util.Map;
> +import java.util.Random;
>
>   import com.google.common.collect.Lists;
>   import com.google.common.collect.Maps;
>   import org.apache.hadoop.conf.Configuration;
> +import org.apache.hadoop.fs.FileStatus;
>   import org.apache.hadoop.fs.FileSystem;
>   import org.apache.hadoop.fs.Path;
>   import org.apache.hadoop.io.Text;
> @@ -350,7 +352,13 @@ public final class TestMeanShift extends
>       Configuration conf = new Configuration();
>       FileSystem fs = FileSystem.get(input.toUri(), conf);
>       Collection<VectorWritable>  points = Lists.newArrayList();
> -    for (Vector v : raw) {
> +    Random r = new Random(123);
> +    Vector[] permutedRaw = new Vector[raw.length];
> +    for (int i = 0; i<  raw.length; i++)
> +      permutedRaw = raw;
> +    for (int i = 0; i<  permutedRaw.length; i++)
> +      permutedRaw[i] = permutedRaw[i + r.nextInt(raw.length - i)];
> +    for (Vector v : permutedRaw) {
>         points.add(new VectorWritable(v));
>       }
>       ClusteringTestUtils.writePointsToFile(points,
> @@ -376,10 +384,12 @@ public final class TestMeanShift extends
>           optKey(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION), "0.2",
>           optKey(DefaultOptionCreator.OVERWRITE_OPTION) };
>       ToolRunner.run(conf, new MeanShiftCanopyDriver(), args);
> -    Path outPart = new Path(output, "clusters-4-final/part-r-00000");
> -    long count = HadoopUtil.countRecords(outPart, conf);
> -    assertEquals("count", 3, count);
> -    outPart = new Path(output, "clusters-0/part-m-00000");
> +    FileStatus[] outParts = FileSystem.get(conf).globStatus(
> +        new Path(output, "clusters-?-final/part-r-*"));
> +    assertEquals("Wrong number of matching final parts", 1, outParts.length);
> +    long count = HadoopUtil.countRecords(outParts[0].getPath(), conf);
> +    assertEquals("count", 5, count);
> +    Path outPart = new Path(output, "clusters-0/part-m-00000");
>       Iterator<?>  iterator = new SequenceFileValueIterator<Writable>(outPart,
>           true, conf);
>       // now test the initial clusters to ensure the type of their centers has
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java Mon Mar 12 18:25:45 2012
> @@ -1,26 +0,0 @@
> -/**
> - * Licensed to the Apache Software Foundation (ASF) under one
> - * or more contributor license agreements. See the NOTICE file
> - * distributed with this work for additional information
> - * regarding copyright ownership. The ASF licenses this file
> - * to you under the Apache License, Version 2.0 (the
> - * "License"); you may not use this file except in compliance
> - * with the License. You may obtain a copy of the License at
> - *
> - * http://www.apache.org/licenses/LICENSE-2.0
> - *
> - * Unless required by applicable law or agreed to in writing,
> - * software distributed under the License is distributed on an
> - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> - * KIND, either express or implied. See the License for the
> - * specific language governing permissions and limitations
> - * under the License.
> - */
> -
> -package org.apache.mahout.common;
> -
> -import org.apache.hadoop.mapreduce.Counter;
> -
> -final class DummyCounter extends Counter {
> -
> -}
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java Mon Mar 12 18:25:45 2012
> @@ -17,16 +17,21 @@
>
>   package org.apache.mahout.common;
>
> +import com.google.common.collect.Lists;
> +
>   import java.io.IOException;
> +import java.lang.reflect.Constructor;
> +import java.lang.reflect.Method;
>   import java.util.List;
>   import java.util.Map;
>   import java.util.Set;
>   import java.util.TreeMap;
>
> -import com.google.common.collect.Lists;
>   import org.apache.hadoop.conf.Configuration;
> +import org.apache.hadoop.mapreduce.MapContext;
>   import org.apache.hadoop.mapreduce.Mapper;
>   import org.apache.hadoop.mapreduce.RecordWriter;
> +import org.apache.hadoop.mapreduce.ReduceContext;
>   import org.apache.hadoop.mapreduce.Reducer;
>   import org.apache.hadoop.mapreduce.TaskAttemptContext;
>   import org.apache.hadoop.mapreduce.TaskAttemptID;
> @@ -65,7 +70,18 @@ public final class DummyRecordWriter<K,
>                                                                         Configuration configuration,
>                                                                         RecordWriter<K2, V2>  output)
>       throws IOException, InterruptedException {
> -    return mapper.new Context(configuration, new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
> +
> +    // Use reflection since the context types changed incompatibly between 0.20
> +    // and 0.23.
> +    try {
> +      return buildNewMapperContext(configuration, output);
> +    } catch (Exception e) {
> +      try {
> +        return buildOldMapperContext(mapper, configuration, output);
> +      } catch (Exception ex) {
> +        throw new IllegalStateException(ex);
> +      }
> +    }
>     }
>
>     public static<K1, V1, K2, V2>  Reducer<K1, V1, K2, V2>.Context build(Reducer<K1, V1, K2, V2>  reducer,
> @@ -74,17 +90,96 @@ public final class DummyRecordWriter<K,
>                                                                          Class<K1>  keyClass,
>                                                                          Class<V1>  valueClass)
>       throws IOException, InterruptedException {
> -    return reducer.new Context(configuration,
> -                               new TaskAttemptID(),
> -                               new MockIterator(),
> -                               null,
> -                               null,
> -                               output,
> -                               null,
> -                               new DummyStatusReporter(),
> -                               null,
> -                               keyClass,
> -                               valueClass);
> +
> +    // Use reflection since the context types changed incompatibly between 0.20
> +    // and 0.23.
> +    try {
> +      return buildNewReducerContext(configuration, output, keyClass, valueClass);
> +    } catch (Exception e) {
> +      try {
> +        return buildOldReducerContext(reducer, configuration, output, keyClass, valueClass);
> +      } catch (Exception ex) {
> +        throw new IllegalStateException(ex);
> +      }
> +    }
> +  }
> +
> +  @SuppressWarnings({ "unchecked", "rawtypes" })
> +  private static<K1, V1, K2, V2>  Mapper<K1, V1, K2, V2>.Context buildNewMapperContext(
> +      Configuration configuration, RecordWriter<K2, V2>  output) throws Exception {
> +    Class<?>  mapContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl");
> +    Constructor<?>  cons = mapContextImplClass.getConstructors()[0];
> +    Object mapContextImpl = cons.newInstance(configuration,
> +        new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
> +
> +    Class<?>  wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
> +    Object wrappedMapper = wrappedMapperClass.newInstance();
> +    Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
> +    return (Mapper.Context) getMapContext.invoke(wrappedMapper, mapContextImpl);
> +  }
> +
> +  @SuppressWarnings({ "unchecked", "rawtypes" })
> +  private static<K1, V1, K2, V2>  Mapper<K1, V1, K2, V2>.Context buildOldMapperContext(
> +      Mapper<K1, V1, K2, V2>  mapper, Configuration configuration,
> +      RecordWriter<K2, V2>  output) throws Exception {
> +    Constructor<?>  cons = getNestedContextConstructor(mapper.getClass());
> +    // first argument to the constructor is the enclosing instance
> +    return (Mapper.Context) cons.newInstance(mapper, configuration,
> +        new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
> +  }
> +
> +  @SuppressWarnings({ "unchecked", "rawtypes" })
> +  private static<K1, V1, K2, V2>  Reducer<K1, V1, K2, V2>.Context buildNewReducerContext(
> +      Configuration configuration, RecordWriter<K2, V2>  output, Class<K1>  keyClass,
> +      Class<V1>  valueClass) throws Exception {
> +    Class<?>  reduceContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.ReduceContextImpl");
> +    Constructor<?>  cons = reduceContextImplClass.getConstructors()[0];
> +    Object reduceContextImpl = cons.newInstance(configuration,
> +      new TaskAttemptID(),
> +      new MockIterator(),
> +      null,
> +      null,
> +      output,
> +      null,
> +      new DummyStatusReporter(),
> +      null,
> +      keyClass,
> +      valueClass);
> +
> +    Class<?>  wrappedReducerClass = Class.forName("org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer");
> +    Object wrappedReducer = wrappedReducerClass.newInstance();
> +    Method getReducerContext = wrappedReducerClass.getMethod("getReducerContext", ReduceContext.class);
> +    return (Reducer.Context) getReducerContext.invoke(wrappedReducer, reduceContextImpl);
> +  }
> +
> +  @SuppressWarnings({ "unchecked", "rawtypes" })
> +  private static<K1, V1, K2, V2>  Reducer<K1, V1, K2, V2>.Context buildOldReducerContext(
> +      Reducer<K1, V1, K2, V2>  reducer, Configuration configuration,
> +      RecordWriter<K2, V2>  output, Class<K1>  keyClass,
> +      Class<V1>  valueClass) throws Exception {
> +    Constructor<?>  cons = getNestedContextConstructor(reducer.getClass());
> +    // first argument to the constructor is the enclosing instance
> +    return (Reducer.Context) cons.newInstance(reducer,
> +        configuration,
> +        new TaskAttemptID(),
> +        new MockIterator(),
> +        null,
> +        null,
> +        output,
> +        null,
> +        new DummyStatusReporter(),
> +        null,
> +        keyClass,
> +        valueClass);
> +  }
> +
> +  private static Constructor<?>  getNestedContextConstructor(Class<?>  outerClass) {
> +    for (Class<?>  nestedClass : outerClass.getClasses()) {
> +      if ("Context".equals(nestedClass.getSimpleName())) {
> +        return nestedClass.getConstructors()[0];
> +      }
> +    }
> +    throw new IllegalStateException("Cannot find context class for " + outerClass);
>     }
>
>   }
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Mon Mar 12 18:25:45 2012
> @@ -19,6 +19,8 @@
>
>   package org.apache.mahout.common;
>
> +import static org.easymock.EasyMock.createMockBuilder;
> +
>   import java.util.Map;
>
>   import com.google.common.collect.Maps;
> @@ -30,10 +32,21 @@ public final class DummyStatusReporter e
>     private final Map<Enum<?>, Counter>  counters = Maps.newHashMap();
>     private final Map<String, Counter>  counterGroups = Maps.newHashMap();
>
> +  private Counter newCounter() {
> +    try {
> +      // 0.23 case
> +      String c = "org.apache.hadoop.mapreduce.counters.GenericCounter";
> +      return (Counter) createMockBuilder(Class.forName(c)).createMock();
> +    } catch (ClassNotFoundException e) {
> +      // 0.20 case
> +      return createMockBuilder(Counter.class).createMock();
> +    }
> +  }
> +
>     @Override
>     public Counter getCounter(Enum<?>  name) {
>       if (!counters.containsKey(name)) {
> -      counters.put(name, new DummyCounter());
> +      counters.put(name, newCounter());
>       }
>       return counters.get(name);
>     }
> @@ -42,7 +55,7 @@ public final class DummyStatusReporter e
>     @Override
>     public Counter getCounter(String group, String name) {
>       if (!counterGroups.containsKey(group + name)) {
> -      counterGroups.put(group + name, new DummyCounter());
> +      counterGroups.put(group + name, newCounter());
>       }
>       return counterGroups.get(group+name);
>     }
> @@ -55,4 +68,8 @@ public final class DummyStatusReporter e
>     public void setStatus(String status) {
>     }
>
> +  public float getProgress() {
> +    return 0;
> +  }
> +
>   }
>
> Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java (original)
> +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java Mon Mar 12 18:25:45 2012
> @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileStatus;
>   import org.apache.hadoop.fs.FileSystem;
>   import org.apache.hadoop.fs.Path;
>   import org.apache.mahout.clustering.ClusteringTestUtils;
> +import org.apache.mahout.common.HadoopUtil;
>   import org.apache.mahout.common.MahoutTestCase;
>   import org.apache.mahout.common.iterator.sequencefile.PathFilters;
>   import org.apache.mahout.math.DenseVector;
> @@ -254,14 +255,14 @@ public final class TestDistributedRowMat
>
>       deleteContentsOfPath(conf, outputPath);
>
> -    assertEquals(0, fs.listStatus(outputPath).length);
> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>
>       Vector result1 = dm.times(v);
>
> -    assertEquals(0, fs.listStatus(outputPath).length);
> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>
>       deleteContentsOfPath(conf, outputPath);
> -    assertEquals(0, fs.listStatus(outputPath).length);
> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>
>       conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
>       dm.setConf(conf);
> @@ -291,14 +292,14 @@ public final class TestDistributedRowMat
>
>       deleteContentsOfPath(conf, outputPath);
>
> -    assertEquals(0, fs.listStatus(outputPath).length);
> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>
>       Vector result1 = dm.timesSquared(v);
>
> -    assertEquals(0, fs.listStatus(outputPath).length);
> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>
>       deleteContentsOfPath(conf, outputPath);
> -    assertEquals(0, fs.listStatus(outputPath).length);
> +    assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
>
>       conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
>       dm.setConf(conf);
> @@ -325,7 +326,7 @@ public final class TestDistributedRowMat
>     private static void deleteContentsOfPath(Configuration conf, Path path) throws Exception {
>       FileSystem fs = path.getFileSystem(conf);
>
> -    FileStatus[] statuses = fs.listStatus(path);
> +    FileStatus[] statuses = HadoopUtil.listStatus(fs, path);
>       for (FileStatus status : statuses) {
>         fs.delete(status.getPath(), true);
>       }
>
> Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
> URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
> +++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Mon Mar 12 18:25:45 2012
> @@ -193,7 +193,7 @@ public final class TestClusterDumper ext
>           output, measure, 8, 4, true, 0.0, true);
>       // run ClusterDumper
>       ClusterDumper clusterDumper = new ClusterDumper(new Path(output,
> -        "clusters-0"), new Path(output, "clusteredPoints"));
> +        "clusters-0-final"), new Path(output, "clusteredPoints"));
>       clusterDumper.printClusters(termDictionary);
>     }
>
>
> Modified: mahout/trunk/pom.xml
> URL: http://svn.apache.org/viewvc/mahout/trunk/pom.xml?rev=1299770&r1=1299769&r2=1299770&view=diff
> ==============================================================================
> --- mahout/trunk/pom.xml (original)
> +++ mahout/trunk/pom.xml Mon Mar 12 18:25:45 2012
> @@ -107,6 +107,17 @@
>       <url>https://issues.apache.org/jira/browse/MAHOUT</url>
>     </issueManagement>
>
> +<repositories>
> +<repository>
> +<id>apache.snapshots</id>
> +<name>Apache Snapshot Repository</name>
> +<url>http://repository.apache.org/snapshots</url>
> +<releases>
> +<enabled>false</enabled>
> +</releases>
> +</repository>
> +</repositories>
> +
>     <dependencyManagement>
>       <dependencies>
>
> @@ -264,6 +275,100 @@
>           </exclusions>
>         </dependency>
>         <dependency>
> +<groupId>org.apache.hadoop</groupId>
> +<artifactId>hadoop-common</artifactId>
> +<version>${hadoop.version}</version>
> +<exclusions>
> +<exclusion>
> +<groupId>net.sf.kosmosfs</groupId>
> +<artifactId>kfs</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.mortbay.jetty</groupId>
> +<artifactId>jetty</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.mortbay.jetty</groupId>
> +<artifactId>jetty-util</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>hsqldb</groupId>
> +<artifactId>hsqldb</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>commons-el</groupId>
> +<artifactId>commons-el</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>junit</groupId>
> +<artifactId>junit</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>oro</groupId>
> +<artifactId>oro</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.mortbay.jetty</groupId>
> +<artifactId>jsp-2.1</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.mortbay.jetty</groupId>
> +<artifactId>jsp-api-2.1</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.mortbay.jetty</groupId>
> +<artifactId>servlet-api-2.5</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>commons-net</groupId>
> +<artifactId>commons-net</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>tomcat</groupId>
> +<artifactId>jasper-runtime</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>tomcat</groupId>
> +<artifactId>jasper-compiler</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>xmlenc</groupId>
> +<artifactId>xmlenc</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>net.java.dev.jets3t</groupId>
> +<artifactId>jets3t</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.eclipse.jdt</groupId>
> +<artifactId>core</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.slf4j</groupId>
> +<artifactId>slf4j-api</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.slf4j</groupId>
> +<artifactId>slf4j-jcl</artifactId>
> +</exclusion>
> +<exclusion>
> +<groupId>org.slf4j</groupId>
> +<artifactId>slf4j-log4j12</artifactId>
> +</exclusion>
> +</exclusions>
> +</dependency>
> +<dependency>
> +<groupId>org.apache.hadoop</groupId>
> +<artifactId>hadoop-mapreduce-client-core</artifactId>
> +<version>${hadoop.version}</version>
> +</dependency>
> +<dependency>
> +<groupId>org.apache.hadoop</groupId>
> +<artifactId>hadoop-mapreduce-client-common</artifactId>
> +<version>${hadoop.version}</version>
> +</dependency>
> +
> +<dependency>
>           <groupId>org.codehaus.jackson</groupId>
>           <artifactId>jackson-core-asl</artifactId>
>           <version>1.8.2</version>
>
>