You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/07/08 01:21:00 UTC
svn commit: r961531 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/dirichlet/
core/src/test/java/org/apache/mahout/clustering/
core/src/test/java/org/apache/mahout/clustering/dirichlet/
core/src/test/java/org/apache/mahout/clusteri...
Author: jeastman
Date: Wed Jul 7 23:21:00 2010
New Revision: 961531
URL: http://svn.apache.org/viewvc?rev=961531&view=rev
Log:
MAHOUR-167: Removed MockMapper and MockReducerContext superceded by DummyRecordWriter. Fixed affected unit tests. All tests run
Removed:
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockMapperContext.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockReducerContext.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java?rev=961531&r1=961530&r2=961531&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java Wed Jul 7 23:21:00 2010
@@ -19,8 +19,6 @@ package org.apache.mahout.clustering.dir
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
-import java.util.Iterator;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.clustering.dirichlet.models.Model;
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java?rev=961531&r1=961530&r2=961531&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java Wed Jul 7 23:21:00 2010
@@ -27,16 +27,17 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.clustering.ClusteringTestUtils;
-import org.apache.mahout.clustering.MockMapperContext;
-import org.apache.mahout.clustering.MockReducerContext;
import org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalModel;
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.NormalModel;
import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
import org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution;
import org.apache.mahout.clustering.dirichlet.models.SampledNormalModel;
-import org.apache.mahout.common.DummyOutputCollector;
+import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.DenseVector;
@@ -114,13 +115,16 @@ public class TestMapReduce extends Mahou
/** Test the basic Mapper */
public void testMapper() throws Exception {
generateSamples(10, 0, 0, 1);
- DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new NormalModelDistribution(new VectorWritable(
- new DenseVector(2))), 5, 1);
+ DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new NormalModelDistribution(new VectorWritable(new DenseVector(2))),
+ 5,
+ 1);
DirichletMapper mapper = new DirichletMapper();
mapper.setup(state);
- DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
- MockMapperContext<Text, VectorWritable> context = new MockMapperContext<Text, VectorWritable>(mapper, conf, collector);
+ DummyRecordWriter<Text, VectorWritable> writer = new DummyRecordWriter<Text, VectorWritable>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable>.Context context = DummyRecordWriter.build(mapper,
+ conf,
+ writer);
for (VectorWritable v : sampleData) {
mapper.map(null, v, context);
}
@@ -135,24 +139,27 @@ public class TestMapReduce extends Mahou
generateSamples(100, 2, 0, 1);
generateSamples(100, 0, 2, 1);
generateSamples(100, 2, 2, 1);
- DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(new VectorWritable(
- new DenseVector(2))), 20, 1);
+ DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))),
+ 20,
+ 1);
DirichletMapper mapper = new DirichletMapper();
mapper.setup(state);
- DummyOutputCollector<Text, VectorWritable> mapCollector = new DummyOutputCollector<Text, VectorWritable>();
- MockMapperContext<Text, VectorWritable> mapContext = new MockMapperContext<Text, VectorWritable>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, VectorWritable> mapWriter = new DummyRecordWriter<Text, VectorWritable>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
for (VectorWritable v : sampleData) {
mapper.map(null, v, mapContext);
}
DirichletReducer reducer = new DirichletReducer();
reducer.setup(state);
- DummyOutputCollector<Text, DirichletCluster<VectorWritable>> reduceCollector = new DummyOutputCollector<Text, DirichletCluster<VectorWritable>>();
- MockReducerContext<Text, DirichletCluster<VectorWritable>> reduceContext = new MockReducerContext<Text, DirichletCluster<VectorWritable>>(
- reducer, conf, reduceCollector, Text.class, DirichletCluster.class);
- for (Text key : mapCollector.getKeys()) {
- reducer.reduce(new Text(key), mapCollector.getValue(key), reduceContext);
+ DummyRecordWriter<Text, DirichletCluster<VectorWritable>> reduceWriter = new DummyRecordWriter<Text, DirichletCluster<VectorWritable>>();
+ Reducer<Text, VectorWritable, Text, DirichletCluster<VectorWritable>>.Context reduceContext = DummyRecordWriter
+ .build(reducer, conf, reduceWriter, Text.class, VectorWritable.class);
+ for (Text key : mapWriter.getKeys()) {
+ reducer.reduce(new Text(key), mapWriter.getValue(key), reduceContext);
}
Model<VectorWritable>[] newModels = reducer.getNewModels();
@@ -180,27 +187,30 @@ public class TestMapReduce extends Mahou
generateSamples(100, 2, 0, 1);
generateSamples(100, 0, 2, 1);
generateSamples(100, 2, 2, 1);
- DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(new VectorWritable(
- new DenseVector(2))), 20, 1.0);
+ DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))),
+ 20,
+ 1.0);
List<Model<VectorWritable>[]> models = new ArrayList<Model<VectorWritable>[]>();
for (int iteration = 0; iteration < 10; iteration++) {
DirichletMapper mapper = new DirichletMapper();
mapper.setup(state);
- DummyOutputCollector<Text, VectorWritable> mapCollector = new DummyOutputCollector<Text, VectorWritable>();
- MockMapperContext<Text, VectorWritable> mapContext = new MockMapperContext<Text, VectorWritable>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, VectorWritable> mapWriter = new DummyRecordWriter<Text, VectorWritable>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
for (VectorWritable v : sampleData) {
mapper.map(null, v, mapContext);
}
DirichletReducer reducer = new DirichletReducer();
reducer.setup(state);
- DummyOutputCollector<Text, DirichletCluster<VectorWritable>> reduceCollector = new DummyOutputCollector<Text, DirichletCluster<VectorWritable>>();
- MockReducerContext<Text, DirichletCluster<VectorWritable>> reduceContext = new MockReducerContext<Text, DirichletCluster<VectorWritable>>(
- reducer, conf, reduceCollector, Text.class, DirichletCluster.class);
- for (Text key : mapCollector.getKeys()) {
- reducer.reduce(new Text(key), mapCollector.getValue(key), reduceContext);
+ DummyRecordWriter<Text, DirichletCluster<VectorWritable>> reduceWriter = new DummyRecordWriter<Text, DirichletCluster<VectorWritable>>();
+ Reducer<Text, VectorWritable, Text, DirichletCluster<VectorWritable>>.Context reduceContext = DummyRecordWriter
+ .build(reducer, conf, reduceWriter, Text.class, VectorWritable.class);
+ for (Text key : mapWriter.getKeys()) {
+ reducer.reduce(new Text(key), mapWriter.getValue(key), reduceContext);
}
Model<VectorWritable>[] newModels = reducer.getNewModels();
@@ -219,9 +229,17 @@ public class TestMapReduce extends Mahou
ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
// Now run the driver
int maxIterations = 5;
- DirichletDriver.runJob(getTestTempDirPath("input"), getTestTempDirPath("output"),
- "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", "org.apache.mahout.math.DenseVector", 20,
- maxIterations, 1.0, 1, false, true, 0);
+ DirichletDriver.runJob(getTestTempDirPath("input"),
+ getTestTempDirPath("output"),
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution",
+ "org.apache.mahout.math.DenseVector",
+ 20,
+ maxIterations,
+ 1.0,
+ 1,
+ false,
+ true,
+ 0);
// and inspect results
List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
Configuration conf = new Configuration();
@@ -258,9 +276,17 @@ public class TestMapReduce extends Mahou
generate4Datasets();
// Now run the driver
int maxIterations = 3;
- DirichletDriver.runJob(getTestTempDirPath("input"), getTestTempDirPath("output"),
- "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", "org.apache.mahout.math.DenseVector", 20,
- maxIterations, 1.0, 1, false, true, 0);
+ DirichletDriver.runJob(getTestTempDirPath("input"),
+ getTestTempDirPath("output"),
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution",
+ "org.apache.mahout.math.DenseVector",
+ 20,
+ maxIterations,
+ 1.0,
+ 1,
+ false,
+ true,
+ 0);
// and inspect results
List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
Configuration conf = new Configuration();
@@ -295,9 +321,17 @@ public class TestMapReduce extends Mahou
generate4Datasets();
// Now run the driver
int maxIterations = 3;
- DirichletDriver.runJob(getTestTempDirPath("input"), getTestTempDirPath("output"),
- "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", "org.apache.mahout.math.DenseVector", 20, maxIterations,
- 1.0, 2, false, true, 0);
+ DirichletDriver.runJob(getTestTempDirPath("input"),
+ getTestTempDirPath("output"),
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution",
+ "org.apache.mahout.math.DenseVector",
+ 20,
+ maxIterations,
+ 1.0,
+ 2,
+ false,
+ true,
+ 0);
// and inspect results
List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
Configuration conf = new Configuration();
@@ -332,9 +366,17 @@ public class TestMapReduce extends Mahou
ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data4.txt"), fs, conf);
// Now run the driver
int maxIterations = 3;
- DirichletDriver.runJob(getTestTempDirPath("input"), getTestTempDirPath("output"),
- "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution", "org.apache.mahout.math.DenseVector",
- 20, maxIterations, 1.0, 2, false, true, 0);
+ DirichletDriver.runJob(getTestTempDirPath("input"),
+ getTestTempDirPath("output"),
+ "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution",
+ "org.apache.mahout.math.DenseVector",
+ 20,
+ maxIterations,
+ 1.0,
+ 2,
+ false,
+ true,
+ 0);
// and inspect results
List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
Configuration conf = new Configuration();
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=961531&r1=961530&r2=961531&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java Wed Jul 7 23:21:00 2010
@@ -29,12 +29,13 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.clustering.ClusteringTestUtils;
-import org.apache.mahout.clustering.MockMapperContext;
-import org.apache.mahout.clustering.MockReducerContext;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.kmeans.TestKmeansClustering;
-import org.apache.mahout.common.DummyOutputCollector;
+import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -87,8 +88,10 @@ public class TestFuzzyKmeansClustering e
}
- private static void computeCluster(List<Vector> points, List<SoftCluster> clusterList, FuzzyKMeansClusterer clusterer,
- Map<Integer, List<WeightedVectorWritable>> pointClusterInfo) {
+ private static void computeCluster(List<Vector> points,
+ List<SoftCluster> clusterList,
+ FuzzyKMeansClusterer clusterer,
+ Map<Integer, List<WeightedVectorWritable>> pointClusterInfo) {
for (Vector point : points) {
// calculate point distances for all clusters
@@ -155,10 +158,16 @@ public class TestFuzzyKmeansClustering e
}
Map<Integer, List<WeightedVectorWritable>> pointClusterInfo = new HashMap<Integer, List<WeightedVectorWritable>>();
// run reference FuzzyKmeans algorithm
- List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(points, clusterList, new EuclideanDistanceMeasure(),
- 0.001, 2, 2);
- computeCluster(points, clusters.get(clusters.size() - 1), new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(), 0.001, 2),
- pointClusterInfo);
+ List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(points,
+ clusterList,
+ new EuclideanDistanceMeasure(),
+ 0.001,
+ 2,
+ 2);
+ computeCluster(points,
+ clusters.get(clusters.size() - 1),
+ new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(), 0.001, 2),
+ pointClusterInfo);
// iterate for each cluster
int size = 0;
@@ -180,8 +189,11 @@ public class TestFuzzyKmeansClustering e
for (int k = 0; k < points.size(); k++) {
System.out.println("testKFuzzyKMeansMRJob k= " + k);
// pick k initial cluster centers at random
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(clustersPath, "part-00000"), Text.class,
- SoftCluster.class);
+ SequenceFile.Writer writer = new SequenceFile.Writer(fs,
+ conf,
+ new Path(clustersPath, "part-00000"),
+ Text.class,
+ SoftCluster.class);
for (int i = 0; i < k + 1; i++) {
Vector vec = tweakValue(points.get(i).get());
@@ -198,8 +210,18 @@ public class TestFuzzyKmeansClustering e
// now run the Job
Path output = getTestTempDirPath("output");
- FuzzyKMeansDriver.runJob(pointsPath, clustersPath, output, EuclideanDistanceMeasure.class.getName(), 0.001, 2, 1, k + 1, 2,
- false, true, 0);
+ FuzzyKMeansDriver.runJob(pointsPath,
+ clustersPath,
+ output,
+ EuclideanDistanceMeasure.class.getName(),
+ 0.001,
+ 2,
+ 1,
+ k + 1,
+ 2,
+ false,
+ true,
+ 0);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(output, "clusteredPoints/part-m-00000"), conf);
IntWritable key = new IntWritable();
@@ -240,21 +262,23 @@ public class TestFuzzyKmeansClustering e
conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");
- DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
- MockMapperContext<Text, FuzzyKMeansInfo> mapContext = new MockMapperContext<Text, FuzzyKMeansInfo>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, FuzzyKMeansInfo> mapWriter = new DummyRecordWriter<Text, FuzzyKMeansInfo>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, FuzzyKMeansInfo>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
mapper.setup(mapContext);
for (VectorWritable point : points) {
mapper.map(new Text(), point, mapContext);
}
// now verify mapper output
- assertEquals("Mapper Keys", k + 1, mapCollector.getData().size());
+ assertEquals("Mapper Keys", k + 1, mapWriter.getData().size());
Map<Vector, Double> pointTotalProbMap = new HashMap<Vector, Double>();
- for (Text key : mapCollector.getKeys()) {
+ for (Text key : mapWriter.getKeys()) {
// SoftCluster cluster = SoftCluster.decodeCluster(key);
- List<FuzzyKMeansInfo> values = mapCollector.getValue(key);
+ List<FuzzyKMeansInfo> values = mapWriter.getValue(key);
for (FuzzyKMeansInfo value : values) {
Double val = pointTotalProbMap.get(value.getVector());
@@ -301,8 +325,10 @@ public class TestFuzzyKmeansClustering e
conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");
- DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
- MockMapperContext<Text, FuzzyKMeansInfo> mapContext = new MockMapperContext<Text, FuzzyKMeansInfo>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, FuzzyKMeansInfo> mapWriter = new DummyRecordWriter<Text, FuzzyKMeansInfo>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, FuzzyKMeansInfo>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
mapper.setup(mapContext);
for (VectorWritable point : points) {
mapper.map(new Text(), point, mapContext);
@@ -310,20 +336,20 @@ public class TestFuzzyKmeansClustering e
// run combiner
FuzzyKMeansCombiner combiner = new FuzzyKMeansCombiner();
- DummyOutputCollector<Text, FuzzyKMeansInfo> combinerCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
- MockReducerContext<Text, FuzzyKMeansInfo> combinerContext = new MockReducerContext<Text, FuzzyKMeansInfo>(combiner, conf,
- combinerCollector, Text.class, FuzzyKMeansInfo.class);
+ DummyRecordWriter<Text, FuzzyKMeansInfo> combinerWriter = new DummyRecordWriter<Text, FuzzyKMeansInfo>();
+ Reducer<Text, FuzzyKMeansInfo, Text, FuzzyKMeansInfo>.Context combinerContext = DummyRecordWriter
+ .build(combiner, conf, combinerWriter, Text.class, FuzzyKMeansInfo.class);
combiner.setup(combinerContext);
- for (Text key : mapCollector.getKeys()) {
- List<FuzzyKMeansInfo> values = mapCollector.getValue(key);
+ for (Text key : mapWriter.getKeys()) {
+ List<FuzzyKMeansInfo> values = mapWriter.getValue(key);
combiner.reduce(new Text(key), values, combinerContext);
}
// now verify the combiner output
- assertEquals("Combiner Output", k + 1, combinerCollector.getData().size());
+ assertEquals("Combiner Output", k + 1, combinerWriter.getData().size());
- for (Text key : combinerCollector.getKeys()) {
- List<FuzzyKMeansInfo> values = combinerCollector.getValue(key);
+ for (Text key : combinerWriter.getKeys()) {
+ List<FuzzyKMeansInfo> values = combinerWriter.getValue(key);
assertEquals("too many values", 1, values.size());
}
}
@@ -356,8 +382,10 @@ public class TestFuzzyKmeansClustering e
conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");
- DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
- MockMapperContext<Text, FuzzyKMeansInfo> mapContext = new MockMapperContext<Text, FuzzyKMeansInfo>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, FuzzyKMeansInfo> mapWriter = new DummyRecordWriter<Text, FuzzyKMeansInfo>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, FuzzyKMeansInfo>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
mapper.setup(mapContext);
for (VectorWritable point : points) {
mapper.map(new Text(), point, mapContext);
@@ -365,29 +393,32 @@ public class TestFuzzyKmeansClustering e
// run combiner
FuzzyKMeansCombiner combiner = new FuzzyKMeansCombiner();
- DummyOutputCollector<Text, FuzzyKMeansInfo> combinerCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
- MockReducerContext<Text, FuzzyKMeansInfo> combinerContext = new MockReducerContext<Text, FuzzyKMeansInfo>(combiner, conf,
- combinerCollector, Text.class, FuzzyKMeansInfo.class);
+ DummyRecordWriter<Text, FuzzyKMeansInfo> combinerWriter = new DummyRecordWriter<Text, FuzzyKMeansInfo>();
+ Reducer<Text, FuzzyKMeansInfo, Text, FuzzyKMeansInfo>.Context combinerContext = DummyRecordWriter
+ .build(combiner, conf, combinerWriter, Text.class, FuzzyKMeansInfo.class);
combiner.setup(combinerContext);
- for (Text key : mapCollector.getKeys()) {
- List<FuzzyKMeansInfo> values = mapCollector.getValue(key);
+ for (Text key : mapWriter.getKeys()) {
+ List<FuzzyKMeansInfo> values = mapWriter.getValue(key);
combiner.reduce(new Text(key), values, combinerContext);
}
// run reducer
FuzzyKMeansReducer reducer = new FuzzyKMeansReducer();
- DummyOutputCollector<Text, SoftCluster> reducerCollector = new DummyOutputCollector<Text, SoftCluster>();
- MockReducerContext<Text, SoftCluster> reducerContext = new MockReducerContext<Text, SoftCluster>(reducer, conf,
- reducerCollector, Text.class, SoftCluster.class);
+ DummyRecordWriter<Text, SoftCluster> reducerWriter = new DummyRecordWriter<Text, SoftCluster>();
+ Reducer<Text, FuzzyKMeansInfo, Text, SoftCluster>.Context reducerContext = DummyRecordWriter.build(reducer,
+ conf,
+ reducerWriter,
+ Text.class,
+ FuzzyKMeansInfo.class);
reducer.setup(clusterList, conf);
- for (Text key : combinerCollector.getKeys()) {
- List<FuzzyKMeansInfo> values = combinerCollector.getValue(key);
+ for (Text key : combinerWriter.getKeys()) {
+ List<FuzzyKMeansInfo> values = combinerWriter.getValue(key);
reducer.reduce(new Text(key), values, reducerContext);
}
// now verify the reducer output
- assertEquals("Reducer Output", k + 1, combinerCollector.getData().size());
+ assertEquals("Reducer Output", k + 1, combinerWriter.getData().size());
// compute the reference result after one iteration and compare
List<SoftCluster> reference = new ArrayList<SoftCluster>();
@@ -405,7 +436,7 @@ public class TestFuzzyKmeansClustering e
for (SoftCluster key : reference) {
String clusterId = key.getIdentifier();
- List<SoftCluster> values = reducerCollector.getValue(new Text(clusterId));
+ List<SoftCluster> values = reducerWriter.getValue(new Text(clusterId));
SoftCluster cluster = values.get(0);
System.out.println("ref= " + key.toString() + " cluster= " + cluster.toString());
cluster.recomputeCenter();
@@ -446,8 +477,10 @@ public class TestFuzzyKmeansClustering e
conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");
- DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
- MockMapperContext<Text, FuzzyKMeansInfo> mapContext = new MockMapperContext<Text, FuzzyKMeansInfo>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, FuzzyKMeansInfo> mapWriter = new DummyRecordWriter<Text, FuzzyKMeansInfo>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, FuzzyKMeansInfo>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
mapper.setup(mapContext);
for (VectorWritable point : points) {
mapper.map(new Text(), point, mapContext);
@@ -455,31 +488,34 @@ public class TestFuzzyKmeansClustering e
// run combiner
FuzzyKMeansCombiner combiner = new FuzzyKMeansCombiner();
- DummyOutputCollector<Text, FuzzyKMeansInfo> combinerCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
- MockReducerContext<Text, FuzzyKMeansInfo> combinerContext = new MockReducerContext<Text, FuzzyKMeansInfo>(combiner, conf,
- combinerCollector, Text.class, FuzzyKMeansInfo.class);
+ DummyRecordWriter<Text, FuzzyKMeansInfo> combinerWriter = new DummyRecordWriter<Text, FuzzyKMeansInfo>();
+ Reducer<Text, FuzzyKMeansInfo, Text, FuzzyKMeansInfo>.Context combinerContext = DummyRecordWriter
+ .build(combiner, conf, combinerWriter, Text.class, FuzzyKMeansInfo.class);
combiner.setup(combinerContext);
- for (Text key : mapCollector.getKeys()) {
- List<FuzzyKMeansInfo> values = mapCollector.getValue(key);
+ for (Text key : mapWriter.getKeys()) {
+ List<FuzzyKMeansInfo> values = mapWriter.getValue(key);
combiner.reduce(new Text(key), values, combinerContext);
}
// run reducer
FuzzyKMeansReducer reducer = new FuzzyKMeansReducer();
- DummyOutputCollector<Text, SoftCluster> reducerCollector = new DummyOutputCollector<Text, SoftCluster>();
- MockReducerContext<Text, SoftCluster> reducerContext = new MockReducerContext<Text, SoftCluster>(reducer, conf,
- reducerCollector, Text.class, SoftCluster.class);
+ DummyRecordWriter<Text, SoftCluster> reducerWriter = new DummyRecordWriter<Text, SoftCluster>();
+ Reducer<Text, FuzzyKMeansInfo, Text, SoftCluster>.Context reducerContext = DummyRecordWriter.build(reducer,
+ conf,
+ reducerWriter,
+ Text.class,
+ FuzzyKMeansInfo.class);
reducer.setup(clusterList, conf);
- for (Text key : combinerCollector.getKeys()) {
- List<FuzzyKMeansInfo> values = combinerCollector.getValue(key);
+ for (Text key : combinerWriter.getKeys()) {
+ List<FuzzyKMeansInfo> values = combinerWriter.getValue(key);
reducer.reduce(new Text(key), values, reducerContext);
}
// run clusterMapper
List<SoftCluster> reducerClusters = new ArrayList<SoftCluster>();
- for (Text key : reducerCollector.getKeys()) {
- List<SoftCluster> values = reducerCollector.getValue(key);
+ for (Text key : reducerWriter.getKeys()) {
+ List<SoftCluster> values = reducerWriter.getValue(key);
reducerClusters.add(values.get(0));
}
for (SoftCluster softCluster : reducerClusters) {
@@ -487,13 +523,13 @@ public class TestFuzzyKmeansClustering e
}
FuzzyKMeansClusterMapper clusterMapper = new FuzzyKMeansClusterMapper();
- DummyOutputCollector<IntWritable, WeightedVectorWritable> clusterMapperCollector = new DummyOutputCollector<IntWritable, WeightedVectorWritable>();
- MockMapperContext<IntWritable, WeightedVectorWritable> clusterMapperContext = new MockMapperContext<IntWritable, WeightedVectorWritable>(
- clusterMapper, conf, clusterMapperCollector);
+ DummyRecordWriter<IntWritable, WeightedVectorWritable> clusterWriter = new DummyRecordWriter<IntWritable, WeightedVectorWritable>();
+ Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context clusterContext = DummyRecordWriter
+ .build(clusterMapper, conf, clusterWriter);
clusterMapper.setup(reducerClusters, conf);
for (VectorWritable point : points) {
- clusterMapper.map(new Text(), point, clusterMapperContext);
+ clusterMapper.map(new Text(), point, clusterContext);
}
// compute the reference result after one iteration and compare
@@ -508,26 +544,28 @@ public class TestFuzzyKmeansClustering e
pointsVectors.add((Vector) point.get());
}
- List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(pointsVectors, reference,
- new EuclideanDistanceMeasure(), 0.001, 2, 1);
+ List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(pointsVectors,
+ reference,
+ new EuclideanDistanceMeasure(),
+ 0.001,
+ 2,
+ 1);
computeCluster(pointsVectors, clusters.get(clusters.size() - 1), new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(),
- 0.001, 2), refClusters);
+ 0.001,
+ 2), refClusters);
// Now compare the clustermapper results with reference implementation
- assertEquals("mapper and reference sizes", refClusters.size(), clusterMapperCollector.getKeys().size());
+ assertEquals("mapper and reference sizes", refClusters.size(), clusterWriter.getKeys().size());
for (Map.Entry<Integer, List<WeightedVectorWritable>> entry : refClusters.entrySet()) {
int key = entry.getKey();
List<WeightedVectorWritable> value = entry.getValue();
- System.out.println("refClusters=" + value + " mapClusters="
- + clusterMapperCollector.getValue(new IntWritable(key)));
- assertEquals("cluster " + key + " sizes",
- value.size(),
- clusterMapperCollector.getValue(new IntWritable(key)).size());
+ System.out.println("refClusters=" + value + " mapClusters=" + clusterWriter.getValue(new IntWritable(key)));
+ assertEquals("cluster " + key + " sizes", value.size(), clusterWriter.getValue(new IntWritable(key)).size());
}
// make sure all points are allocated to a cluster
int size = 0;
- for (List<WeightedVectorWritable> pts: refClusters.values()) {
+ for (List<WeightedVectorWritable> pts : refClusters.values()) {
size += pts.size();
}
assertEquals("total size", size, points.size());
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=961531&r1=961530&r2=961531&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Wed Jul 7 23:21:00 2010
@@ -29,12 +29,14 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.clustering.ClusteringTestUtils;
-import org.apache.mahout.clustering.MockMapperContext;
-import org.apache.mahout.clustering.MockReducerContext;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.common.DummyOutputCollector;
+import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -153,8 +155,10 @@ public class TestKmeansClustering extend
List<VectorWritable> points = getPointsWritable(reference);
for (int k = 0; k < points.size(); k++) {
// pick k initial cluster centers at random
- DummyOutputCollector<Text, KMeansInfo> collector = new DummyOutputCollector<Text, KMeansInfo>();
- MockMapperContext<Text, KMeansInfo> context = new MockMapperContext<Text, KMeansInfo>(mapper, conf, collector);
+ DummyRecordWriter<Text, KMeansInfo> mapWriter = new DummyRecordWriter<Text, KMeansInfo>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, KMeansInfo>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
List<Cluster> clusters = new ArrayList<Cluster>();
for (int i = 0; i < k + 1; i++) {
@@ -167,15 +171,15 @@ public class TestKmeansClustering extend
// map the data
for (VectorWritable point : points) {
- mapper.map(new Text(), point, context);
+ mapper.map(new Text(), point, mapContext);
}
- assertEquals("Number of map results", k + 1, collector.getData().size());
+ assertEquals("Number of map results", k + 1, mapWriter.getData().size());
// now verify that all points are correctly allocated
EuclideanDistanceMeasure euclideanDistanceMeasure = measure;
Map<String, Cluster> clusterMap = loadClusterMap(clusters);
- for (Text key : collector.getKeys()) {
+ for (Text key : mapWriter.getKeys()) {
Cluster cluster = clusterMap.get(key.toString());
- List<KMeansInfo> values = collector.getValue(key);
+ List<KMeansInfo> values = mapWriter.getValue(key);
for (KMeansInfo value : values) {
double distance = euclideanDistanceMeasure.distance(cluster.getCenter(), value.getPointTotal());
for (Cluster c : clusters) {
@@ -200,8 +204,10 @@ public class TestKmeansClustering extend
List<VectorWritable> points = getPointsWritable(reference);
for (int k = 0; k < points.size(); k++) {
// pick k initial cluster centers at random
- DummyOutputCollector<Text, KMeansInfo> mapCollector = new DummyOutputCollector<Text, KMeansInfo>();
- MockMapperContext<Text, KMeansInfo> mapContext = new MockMapperContext<Text, KMeansInfo>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, KMeansInfo> mapWriter = new DummyRecordWriter<Text, KMeansInfo>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, KMeansInfo>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
List<Cluster> clusters = new ArrayList<Cluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = points.get(i).get();
@@ -218,19 +224,22 @@ public class TestKmeansClustering extend
}
// now combine the data
KMeansCombiner combiner = new KMeansCombiner();
- DummyOutputCollector<Text, KMeansInfo> combineCollector = new DummyOutputCollector<Text, KMeansInfo>();
- MockReducerContext<Text, KMeansInfo> combineContext = new MockReducerContext<Text, KMeansInfo>(combiner, conf, combineCollector,
- Text.class, KMeansInfo.class);
- for (Text key : mapCollector.getKeys()) {
- combiner.reduce(new Text(key), mapCollector.getValue(key), combineContext);
+ DummyRecordWriter<Text, KMeansInfo> combinerWriter = new DummyRecordWriter<Text, KMeansInfo>();
+ Reducer<Text, KMeansInfo, Text, KMeansInfo>.Context combinerContext = DummyRecordWriter.build(combiner,
+ conf,
+ combinerWriter,
+ Text.class,
+ KMeansInfo.class);
+ for (Text key : mapWriter.getKeys()) {
+ combiner.reduce(new Text(key), mapWriter.getValue(key), combinerContext);
}
- assertEquals("Number of map results", k + 1, combineCollector.getData().size());
+ assertEquals("Number of map results", k + 1, combinerWriter.getData().size());
// now verify that all points are accounted for
int count = 0;
Vector total = new DenseVector(2);
- for (Text key : combineCollector.getKeys()) {
- List<KMeansInfo> values = combineCollector.getValue(key);
+ for (Text key : combinerWriter.getKeys()) {
+ List<KMeansInfo> values = combinerWriter.getValue(key);
assertEquals("too many values", 1, values.size());
// String value = values.get(0).toString();
KMeansInfo info = values.get(0);
@@ -259,8 +268,10 @@ public class TestKmeansClustering extend
for (int k = 0; k < points.size(); k++) {
System.out.println("K = " + k);
// pick k initial cluster centers at random
- DummyOutputCollector<Text, KMeansInfo> mapCollector = new DummyOutputCollector<Text, KMeansInfo>();
- MockMapperContext<Text, KMeansInfo> mapContext = new MockMapperContext<Text, KMeansInfo>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, KMeansInfo> mapWriter = new DummyRecordWriter<Text, KMeansInfo>();
+ Mapper<WritableComparable<?>, VectorWritable, Text, KMeansInfo>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
List<Cluster> clusters = new ArrayList<Cluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = points.get(i).get();
@@ -276,24 +287,30 @@ public class TestKmeansClustering extend
}
// now combine the data
KMeansCombiner combiner = new KMeansCombiner();
- DummyOutputCollector<Text, KMeansInfo> combineCollector = new DummyOutputCollector<Text, KMeansInfo>();
- MockReducerContext<Text, KMeansInfo> combineContext = new MockReducerContext<Text, KMeansInfo>(combiner, conf,
- combineCollector, Text.class, KMeansInfo.class);
- for (Text key : mapCollector.getKeys()) {
- combiner.reduce(new Text(key), mapCollector.getValue(key), combineContext);
+ DummyRecordWriter<Text, KMeansInfo> combinerWriter = new DummyRecordWriter<Text, KMeansInfo>();
+ Reducer<Text, KMeansInfo, Text, KMeansInfo>.Context combinerContext = DummyRecordWriter.build(combiner,
+ conf,
+ combinerWriter,
+ Text.class,
+ KMeansInfo.class);
+ for (Text key : mapWriter.getKeys()) {
+ combiner.reduce(new Text(key), mapWriter.getValue(key), combinerContext);
}
// now reduce the data
KMeansReducer reducer = new KMeansReducer();
reducer.setup(clusters, measure);
- DummyOutputCollector<Text, Cluster> reduceCollector = new DummyOutputCollector<Text, Cluster>();
- MockReducerContext<Text, Cluster> reduceContext = new MockReducerContext<Text, Cluster>(reducer, conf, reduceCollector,
- Text.class, Cluster.class);
- for (Text key : combineCollector.getKeys()) {
- reducer.reduce(new Text(key), combineCollector.getValue(key), reduceContext);
+ DummyRecordWriter<Text, Cluster> reducerWriter = new DummyRecordWriter<Text, Cluster>();
+ Reducer<Text, KMeansInfo, Text, Cluster>.Context reducerContext = DummyRecordWriter.build(reducer,
+ conf,
+ reducerWriter,
+ Text.class,
+ KMeansInfo.class);
+ for (Text key : combinerWriter.getKeys()) {
+ reducer.reduce(new Text(key), combinerWriter.getValue(key), reducerContext);
}
- assertEquals("Number of map results", k + 1, reduceCollector.getData().size());
+ assertEquals("Number of map results", k + 1, reducerWriter.getData().size());
// compute the reference result after one iteration and compare
List<Cluster> reference = new ArrayList<Cluster>();
@@ -316,7 +333,7 @@ public class TestKmeansClustering extend
converged = true;
for (Cluster ref : reference) {
String key = ref.getIdentifier();
- List<Cluster> values = reduceCollector.getValue(new Text(key));
+ List<Cluster> values = reducerWriter.getValue(new Text(key));
Cluster cluster = values.get(0);
converged = converged && cluster.isConverged();
// Since we aren't roundtripping through Writable, we need to compare the reference center with the
@@ -399,8 +416,14 @@ public class TestKmeansClustering extend
CanopyDriver.runJob(pointsPath, outputPath, ManhattanDistanceMeasure.class.getName(), 3.1, 2.1, false);
// now run the KMeans job
- KMeansDriver.runJob(pointsPath, new Path(outputPath, "clusters-0"), outputPath, EuclideanDistanceMeasure.class.getName(),
- 0.001, 10, 1, true);
+ KMeansDriver.runJob(pointsPath,
+ new Path(outputPath, "clusters-0"),
+ outputPath,
+ EuclideanDistanceMeasure.class.getName(),
+ 0.001,
+ 10,
+ 1,
+ true);
// now compare the expected clusters with actual
Path clusteredPointsPath = new Path(outputPath, "clusteredPoints");
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=961531&r1=961530&r2=961531&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Wed Jul 7 23:21:00 2010
@@ -27,10 +27,11 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.clustering.ClusteringTestUtils;
-import org.apache.mahout.clustering.MockMapperContext;
-import org.apache.mahout.clustering.MockReducerContext;
-import org.apache.mahout.common.DummyOutputCollector;
+import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -161,8 +162,10 @@ public class TestMeanShift extends Mahou
// map the data
MeanShiftCanopyMapper mapper = new MeanShiftCanopyMapper();
- DummyOutputCollector<Text, MeanShiftCanopy> mapCollector = new DummyOutputCollector<Text, MeanShiftCanopy>();
- MockMapperContext<Text, MeanShiftCanopy> mapContext = new MockMapperContext<Text, MeanShiftCanopy>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, MeanShiftCanopy> mapWriter = new DummyRecordWriter<Text, MeanShiftCanopy>();
+ Mapper<WritableComparable<?>, MeanShiftCanopy, Text, MeanShiftCanopy>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
mapper.setup(mapContext);
for (MeanShiftCanopy canopy : canopies) {
mapper.map(new Text(), canopy, mapContext);
@@ -170,8 +173,8 @@ public class TestMeanShift extends Mahou
mapper.cleanup(mapContext);
// now verify the output
- assertEquals("Number of map results", 1, mapCollector.getData().size());
- List<MeanShiftCanopy> data = mapCollector.getValue(new Text("canopy"));
+ assertEquals("Number of map results", 1, mapWriter.getData().size());
+ List<MeanShiftCanopy> data = mapWriter.getValue(new Text("canopy"));
assertEquals("Number of canopies", refCanopies.size(), data.size());
// add all points to the reference canopies
@@ -229,8 +232,10 @@ public class TestMeanShift extends Mahou
conf.set(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.5");
MeanShiftCanopyMapper mapper = new MeanShiftCanopyMapper();
- DummyOutputCollector<Text, MeanShiftCanopy> mapCollector = new DummyOutputCollector<Text, MeanShiftCanopy>();
- MockMapperContext<Text, MeanShiftCanopy> mapContext = new MockMapperContext<Text, MeanShiftCanopy>(mapper, conf, mapCollector);
+ DummyRecordWriter<Text, MeanShiftCanopy> mapWriter = new DummyRecordWriter<Text, MeanShiftCanopy>();
+ Mapper<WritableComparable<?>, MeanShiftCanopy, Text, MeanShiftCanopy>.Context mapContext = DummyRecordWriter.build(mapper,
+ conf,
+ mapWriter);
mapper.setup(mapContext);
// map the data
@@ -239,18 +244,21 @@ public class TestMeanShift extends Mahou
}
mapper.cleanup(mapContext);
- assertEquals("Number of map results", 1, mapCollector.getData().size());
+ assertEquals("Number of map results", 1, mapWriter.getData().size());
// now reduce the mapper output
MeanShiftCanopyReducer reducer = new MeanShiftCanopyReducer();
- DummyOutputCollector<Text, MeanShiftCanopy> reduceCollector = new DummyOutputCollector<Text, MeanShiftCanopy>();
- MockReducerContext<Text, MeanShiftCanopy> reduceContext = new MockReducerContext<Text, MeanShiftCanopy>(reducer, conf,
- reduceCollector, Text.class, MeanShiftCanopy.class);
+ DummyRecordWriter<Text, MeanShiftCanopy> reduceWriter = new DummyRecordWriter<Text, MeanShiftCanopy>();
+ Reducer<Text, MeanShiftCanopy, Text, MeanShiftCanopy>.Context reduceContext = DummyRecordWriter.build(reducer,
+ conf,
+ reduceWriter,
+ Text.class,
+ MeanShiftCanopy.class);
reducer.setup(reduceContext);
- reducer.reduce(new Text("canopy"), mapCollector.getValue(new Text("canopy")), reduceContext);
+ reducer.reduce(new Text("canopy"), mapWriter.getValue(new Text("canopy")), reduceContext);
reducer.cleanup(reduceContext);
// now verify the output
- assertEquals("Number of canopies", reducerReference.size(), reduceCollector.getKeys().size());
+ assertEquals("Number of canopies", reducerReference.size(), reduceWriter.getKeys().size());
// add all points to the reference canopy maps
Map<String, MeanShiftCanopy> reducerReferenceMap = new HashMap<String, MeanShiftCanopy>();
@@ -261,7 +269,7 @@ public class TestMeanShift extends Mahou
for (Map.Entry<String, MeanShiftCanopy> mapEntry : reducerReferenceMap.entrySet()) {
MeanShiftCanopy refCanopy = mapEntry.getValue();
- List<MeanShiftCanopy> values = reduceCollector.getValue(new Text((refCanopy.isConverged() ? "V-" : "C-")
+ List<MeanShiftCanopy> values = reduceWriter.getValue(new Text((refCanopy.isConverged() ? "V-" : "C-")
+ refCanopy.getCanopyId()));
assertEquals("values", 1, values.size());
MeanShiftCanopy reducerCanopy = values.get(0);
Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java?rev=961531&r1=961530&r2=961531&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java Wed Jul 7 23:21:00 2010
@@ -25,8 +25,8 @@ import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
-import org.apache.mahout.clustering.MockReducerContext;
-import org.apache.mahout.common.DummyOutputCollector;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.ga.watchmaker.cd.CDFitness;
@@ -64,19 +64,23 @@ public class CDReducerTest extends Mahou
public void testReduce() throws IOException, InterruptedException {
CDReducer reducer = new CDReducer();
Configuration conf = new Configuration();
- DummyOutputCollector<LongWritable, CDFitness> collector = new DummyOutputCollector<LongWritable, CDFitness>();
- MockReducerContext<LongWritable, CDFitness> context = new MockReducerContext<LongWritable, CDFitness>(reducer, conf, collector,
- LongWritable.class, CDFitness.class);
+ DummyRecordWriter<LongWritable, CDFitness> reduceWriter = new DummyRecordWriter<LongWritable, CDFitness>();
+ Reducer<LongWritable, CDFitness, LongWritable, CDFitness>.Context reduceContext = DummyRecordWriter.build(reducer,
+ conf,
+ reduceWriter,
+ LongWritable.class,
+ CDFitness.class);
+
LongWritable zero = new LongWritable(0);
- reducer.reduce(zero, evaluations, context);
+ reducer.reduce(zero, evaluations, reduceContext);
// check if the expectations are met
- Set<LongWritable> keys = collector.getKeys();
+ Set<LongWritable> keys = reduceWriter.getKeys();
assertEquals("nb keys", 1, keys.size());
assertTrue("bad key", keys.contains(zero));
- assertEquals("nb values", 1, collector.getValue(zero).size());
- CDFitness fitness = collector.getValue(zero).get(0);
+ assertEquals("nb values", 1, reduceWriter.getValue(zero).size());
+ CDFitness fitness = reduceWriter.getValue(zero).get(0);
assertEquals(expected, fitness);
}
Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java?rev=961531&r1=961530&r2=961531&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java Wed Jul 7 23:21:00 2010
@@ -22,8 +22,8 @@ import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.mahout.clustering.MockMapperContext;
-import org.apache.mahout.common.DummyOutputCollector;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.MahoutTestCase;
public class ToolMapperTest extends MahoutTestCase {
@@ -33,8 +33,8 @@ public class ToolMapperTest extends Maho
Text value = new Text();
Configuration conf = new Configuration();
ToolMapper mapper = new ToolMapper();
- DummyOutputCollector<LongWritable, Text> output = new DummyOutputCollector<LongWritable, Text>();
- MockMapperContext<LongWritable, Text> context = new MockMapperContext<LongWritable, Text>(mapper, conf, output);
+ DummyRecordWriter<LongWritable, Text> writer = new DummyRecordWriter<LongWritable, Text>();
+ Mapper<LongWritable, Text, LongWritable, Text>.Context context = DummyRecordWriter.build(mapper, conf, writer);
// no attribute is ignored
char[] descriptors = { 'N', 'N', 'C', 'C', 'N', 'N' };
@@ -45,7 +45,7 @@ public class ToolMapperTest extends Maho
mapper.map(key, value, context);
for (int index = 0; index < 6; index++) {
- List<Text> values = output.getValue(new LongWritable(index));
+ List<Text> values = writer.getValue(new LongWritable(index));
assertEquals("should extract one value per attribute", 1, values.size());
assertEquals("Bad extracted value", "A" + (index + 1), values.get(0).toString());
}
@@ -56,8 +56,8 @@ public class ToolMapperTest extends Maho
Text value = new Text();
ToolMapper mapper = new ToolMapper();
Configuration conf = new Configuration();
- DummyOutputCollector<LongWritable, Text> output = new DummyOutputCollector<LongWritable, Text>();
- MockMapperContext<LongWritable, Text> context = new MockMapperContext<LongWritable, Text>(mapper, conf, output);
+ DummyRecordWriter<LongWritable, Text> writer = new DummyRecordWriter<LongWritable, Text>();
+ Mapper<LongWritable, Text, LongWritable, Text>.Context context = DummyRecordWriter.build(mapper, conf, writer);
// no attribute is ignored
char[] descriptors = { 'N', 'I', 'C', 'I', 'I', 'N' };
@@ -68,7 +68,7 @@ public class ToolMapperTest extends Maho
mapper.map(key, value, context);
for (int index = 0; index < 6; index++) {
- List<Text> values = output.getValue(new LongWritable(index));
+ List<Text> values = writer.getValue(new LongWritable(index));
if (index == 1 || index == 3 || index == 4) {
// this attribute should be ignored
assertNull("Attribute (" + index + ") should be ignored", values);