You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by jm...@apache.org on 2010/01/13 09:01:42 UTC
svn commit: r898669 [1/3] - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/org/apache/mahout/clustering/canopy/
core/src/main/java/org/apache/mahout/clustering/dirichlet/
core/src/main/java/org/ap...
Author: jmannix
Date: Wed Jan 13 08:01:34 2010
New Revision: 898669
URL: http://svn.apache.org/viewvc?rev=898669&view=rev
Log:
MAHOUT-205
It's pretty invasive, so update patches as necessary.
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/ClusteringTestUtils.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
lucene/mahout/trunk/math/pom.xml
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseMatrix.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Matrix.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/MatrixView.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SparseColumnMatrix.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SparseMatrix.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SparseRowMatrix.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SparseVector.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Vector.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/PartialVectorGenerator.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/PartialVectorMerger.java
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Wed Jan 13 08:01:34 2010
@@ -36,6 +36,7 @@
import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
import org.apache.mahout.math.SparseVector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.Map;
@@ -100,7 +101,7 @@
ItemPrefWritable.class,
ToUserVectorReducer.class,
LongWritable.class,
- SparseVector.class,
+ VectorWritable.class,
SequenceFileOutputFormat.class);
JobClient.runJob(toUserVectorConf);
@@ -113,7 +114,7 @@
IntWritable.class,
UserVectorToCooccurrenceReducer.class,
IntWritable.class,
- SparseVector.class,
+ VectorWritable.class,
MapFileOutputFormat.class);
JobClient.runJob(toCooccurrenceConf);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java Wed Jan 13 08:01:34 2010
@@ -39,6 +39,7 @@
import org.apache.mahout.common.FileLineIterable;
import org.apache.mahout.math.SparseVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.ArrayList;
@@ -50,7 +51,7 @@
public final class RecommenderMapper
extends MapReduceBase
- implements Mapper<LongWritable, SparseVector, LongWritable, RecommendedItemsWritable> {
+ implements Mapper<LongWritable, VectorWritable, LongWritable, RecommendedItemsWritable> {
static final String COOCCURRENCE_PATH = "cooccurrencePath";
static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
@@ -59,7 +60,7 @@
private int recommendationsPerUser;
private MapFilesMap<IntWritable,LongWritable> indexItemIDMap;
- private MapFilesMap<IntWritable,Vector> cooccurrenceColumnMap;
+ private MapFilesMap<IntWritable, VectorWritable> cooccurrenceColumnMap;
private Cache<IntWritable,Vector> cooccurrenceColumnCache;
private FastIDSet usersToRecommendFor;
@@ -71,7 +72,7 @@
Path itemIDIndexPath = new Path(jobConf.get(ITEMID_INDEX_PATH)).makeQualified(fs);
recommendationsPerUser = jobConf.getInt(RECOMMENDATIONS_PER_USER, 10);
indexItemIDMap = new MapFilesMap<IntWritable,LongWritable>(fs, itemIDIndexPath, new Configuration());
- cooccurrenceColumnMap = new MapFilesMap<IntWritable,Vector>(fs, cooccurrencePath, new Configuration());
+ cooccurrenceColumnMap = new MapFilesMap<IntWritable,VectorWritable>(fs, cooccurrencePath, new Configuration());
String usersFilePathString = jobConf.get(USERS_FILE);
if (usersFilePathString == null) {
usersToRecommendFor = null;
@@ -91,14 +92,14 @@
@Override
public void map(LongWritable userID,
- SparseVector userVector,
+ VectorWritable vectorWritable,
OutputCollector<LongWritable, RecommendedItemsWritable> output,
Reporter reporter) throws IOException {
if (usersToRecommendFor != null && !usersToRecommendFor.contains(userID.get())) {
return;
}
-
+ Vector userVector = vectorWritable.get();
Iterator<Vector.Element> userVectorIterator = userVector.iterateNonZero();
Vector recommendationVector = new SparseVector(Integer.MAX_VALUE, 1000);
while (userVectorIterator.hasNext()) {
@@ -152,26 +153,27 @@
private static class CooccurrenceCache implements Retriever<IntWritable,Vector> {
- private final MapFilesMap<IntWritable,Vector> map;
- private Vector columnVector;
+ private final MapFilesMap<IntWritable,VectorWritable> map;
+ private VectorWritable columnVector;
- private CooccurrenceCache(MapFilesMap<IntWritable,Vector> map) {
+ private CooccurrenceCache(MapFilesMap<IntWritable,VectorWritable> map) {
this.map = map;
- columnVector = new SparseVector(Integer.MAX_VALUE, 1000);
+ columnVector = new VectorWritable();
+ columnVector.set(new SparseVector(Integer.MAX_VALUE, 1000));
}
@Override
public Vector get(IntWritable key) throws TasteException {
Vector value;
try {
- value = map.get(key, columnVector);
+ value = map.get(key, columnVector).get();
} catch (IOException ioe) {
throw new TasteException(ioe);
}
if (value == null) {
return null;
}
- columnVector = new SparseVector(Integer.MAX_VALUE, 1000);
+ columnVector.set(new SparseVector(Integer.MAX_VALUE, 1000));
return value;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Wed Jan 13 08:01:34 2010
@@ -22,6 +22,7 @@
import org.apache.mahout.clustering.ClusterBase;
import org.apache.mahout.math.AbstractVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.DataInput;
import java.io.DataOutput;
@@ -54,13 +55,13 @@
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
- AbstractVector.writeVector(out, computeCentroid());
+ VectorWritable.writeVector(out, computeCentroid());
}
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
- this.setCenter(AbstractVector.readVector(in));
+ this.setCenter(VectorWritable.readVector(in));
this.setPointTotal(getCenter().clone());
this.setNumPoints(1);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java Wed Jan 13 08:01:34 2010
@@ -25,6 +25,7 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
public class CanopyClusterer {
@@ -146,16 +147,18 @@
* @param collector an OutputCollector in which to emit the point
*/
public void emitPointToExistingCanopies(Vector point, List<Canopy> canopies,
- OutputCollector<Text, Vector> collector) throws IOException {
+ OutputCollector<Text, VectorWritable> collector) throws IOException {
double minDist = Double.MAX_VALUE;
Canopy closest = null;
boolean isCovered = false;
+ VectorWritable vw = new VectorWritable();
for (Canopy canopy : canopies) {
double dist = measure.distance(canopy.getCenter().getLengthSquared(),
canopy.getCenter(), point);
if (dist < t1) {
isCovered = true;
- collector.collect(new Text(canopy.getIdentifier()), point);
+ vw.set(point);
+ collector.collect(new Text(canopy.getIdentifier()), vw);
} else if (dist < minDist) {
minDist = dist;
closest = canopy;
@@ -163,8 +166,9 @@
}
// if the point is not contained in any canopies (due to canopy centroid
// clustering), emit the point to the closest covering canopy.
+ vw.set(point);
if (!isCovered) {
- collector.collect(new Text(closest.getIdentifier()), point);
+ collector.collect(new Text(closest.getIdentifier()), vw);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java Wed Jan 13 08:01:34 2010
@@ -114,7 +114,7 @@
double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
- runJob(input, output, measureClass, t1, t2, vectorClass);
+ runJob(input, output, measureClass, t1, t2);
} catch (OptionException e) {
log.error("Exception", e);
@@ -132,9 +132,9 @@
* @param t2 the T2 distance threshold
*/
public static void runJob(String input, String output,
- String measureClassName, double t1, double t2, Class<? extends Vector> vectorClass) throws IOException {
- CanopyDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY, measureClassName, t1, t2, vectorClass);
- ClusterDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output, measureClassName, t1, t2, vectorClass);
+ String measureClassName, double t1, double t2) throws IOException {
+ CanopyDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY, measureClassName, t1, t2);
+ ClusterDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output, measureClassName, t1, t2);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Wed Jan 13 08:01:34 2010
@@ -39,6 +39,7 @@
import org.apache.mahout.math.Vector;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
+import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -110,7 +111,7 @@
double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
- runJob(input, output, measureClass, t1, t2, vectorClass);
+ runJob(input, output, measureClass, t1, t2);
} catch (OptionException e) {
log.error("Exception", e);
CommandLineUtil.printHelp(group);
@@ -126,14 +127,11 @@
* @param measureClassName the DistanceMeasure class name
* @param t1 the T1 distance threshold
* @param t2 the T2 distance threshold
- * @param vectorClass the {@link Class} of Vector to use for the Map Output Key. Must be a concrete type
- * @see org.apache.mahout.math.SparseVector
- * @see org.apache.mahout.math.DenseVector
*/
public static void runJob(String input, String output,
- String measureClassName, double t1, double t2, Class<? extends Vector> vectorClass) throws IOException {
+ String measureClassName, double t1, double t2) throws IOException {
log.info("Input: " + input + " Out: " + output + " Measure: " + measureClassName + " t1: " + t1
- + " t2: " + t2 + " Vector Class: " + vectorClass.getSimpleName());
+ + " t2: " + t2);
Configurable client = new JobClient();
JobConf conf = new JobConf(CanopyDriver.class);
conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
@@ -143,7 +141,7 @@
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setMapOutputKeyClass(Text.class);
- conf.setMapOutputValueClass(vectorClass);
+ conf.setMapOutputValueClass(VectorWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Canopy.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java Wed Jan 13 08:01:34 2010
@@ -25,25 +25,26 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class CanopyMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, Vector, Text, Vector> {
+ Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
private final List<Canopy> canopies = new ArrayList<Canopy>();
- private OutputCollector<Text, Vector> outputCollector;
+ private OutputCollector<Text, VectorWritable> outputCollector;
private CanopyClusterer canopyClusterer;
@Override
- public void map(WritableComparable<?> key, Vector point,
- OutputCollector<Text, Vector> output, Reporter reporter) throws IOException {
+ public void map(WritableComparable<?> key, VectorWritable point,
+ OutputCollector<Text, VectorWritable> output, Reporter reporter) throws IOException {
outputCollector = output;
- canopyClusterer.addPointToCanopies(point, canopies);
+ canopyClusterer.addPointToCanopies(point.get(), canopies);
}
@Override
@@ -56,7 +57,9 @@
public void close() throws IOException {
for (Canopy canopy : canopies) {
Vector centroid = canopy.computeCentroid();
- outputCollector.collect(new Text("centroid"), centroid);
+ VectorWritable vw = new VectorWritable();
+ vw.set(centroid);
+ outputCollector.collect(new Text("centroid"), vw);
}
super.close();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java Wed Jan 13 08:01:34 2010
@@ -24,6 +24,7 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.ArrayList;
@@ -31,17 +32,17 @@
import java.util.List;
public class CanopyReducer extends MapReduceBase implements
- Reducer<Text, Vector, Text, Canopy> {
+ Reducer<Text, VectorWritable, Text, Canopy> {
private final List<Canopy> canopies = new ArrayList<Canopy>();
private CanopyClusterer canopyClusterer;
@Override
- public void reduce(Text key, Iterator<Vector> values,
+ public void reduce(Text key, Iterator<VectorWritable> values,
OutputCollector<Text, Canopy> output, Reporter reporter) throws IOException {
while (values.hasNext()) {
- Vector point = values.next();
+ Vector point = values.next().get();
canopyClusterer.addPointToCanopies(point, canopies);
}
for (Canopy canopy : canopies) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Wed Jan 13 08:01:34 2010
@@ -40,6 +40,7 @@
import org.apache.mahout.math.Vector;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
+import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -117,7 +118,7 @@
double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
- runJob(points, canopies, output, measureClass, t1, t2, vectorClass);
+ runJob(points, canopies, output, measureClass, t1, t2);
} catch (OptionException e) {
log.error("Exception", e);
@@ -136,10 +137,9 @@
* @param measureClassName the DistanceMeasure class name
* @param t1 the T1 distance threshold
* @param t2 the T2 distance threshold
- * @param vectorClass The {@link Class} of Vector to use for the Output Value Class. Must be concrete.
*/
public static void runJob(String points, String canopies, String output,
- String measureClassName, double t1, double t2, Class<? extends Vector> vectorClass) throws IOException {
+ String measureClassName, double t1, double t2) throws IOException {
Configurable client = new JobClient();
JobConf conf = new JobConf(ClusterDriver.class);
@@ -153,7 +153,7 @@
/*conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(SparseVector.class);*/
conf.setOutputKeyClass(Text.class);
- conf.setOutputValueClass(vectorClass);
+ conf.setOutputValueClass(VectorWritable.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(points));
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java Wed Jan 13 08:01:34 2010
@@ -28,21 +28,22 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class ClusterMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, Vector, Text, Vector> {
+ Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
private CanopyClusterer canopyClusterer;
private final List<Canopy> canopies = new ArrayList<Canopy>();
@Override
- public void map(WritableComparable<?> key, Vector point,
- OutputCollector<Text, Vector> output, Reporter reporter) throws IOException {
- canopyClusterer.emitPointToExistingCanopies(point, canopies, output);
+ public void map(WritableComparable<?> key, VectorWritable point,
+ OutputCollector<Text, VectorWritable> output, Reporter reporter) throws IOException {
+ canopyClusterer.emitPointToExistingCanopies(point.get(), canopies, output);
}
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Wed Jan 13 08:01:34 2010
@@ -42,6 +42,7 @@
import org.apache.mahout.math.Vector;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -148,7 +149,7 @@
String modelFactory, int numModels, double alpha_0)
throws ClassNotFoundException, InstantiationException,
IllegalAccessException, IOException {
- DirichletState<Vector> state = createState(modelFactory, numModels, alpha_0);
+ DirichletState<VectorWritable> state = createState(modelFactory, numModels, alpha_0);
JobConf job = new JobConf(KMeansDriver.class);
Path outPath = new Path(output);
FileSystem fs = FileSystem.get(outPath.toUri(), job);
@@ -162,15 +163,15 @@
}
}
- public static DirichletState<Vector> createState(String modelFactory,
+ public static DirichletState<VectorWritable> createState(String modelFactory,
int numModels, double alpha_0) throws ClassNotFoundException,
InstantiationException, IllegalAccessException {
ClassLoader ccl = Thread.currentThread().getContextClassLoader();
Class<? extends ModelDistribution> cl =
ccl.loadClass(modelFactory).asSubclass(ModelDistribution.class);
- ModelDistribution<Vector> factory = (ModelDistribution<Vector>) cl
+ ModelDistribution<VectorWritable> factory = (ModelDistribution<VectorWritable>) cl
.newInstance();
- return new DirichletState<Vector>(factory,
+ return new DirichletState<VectorWritable>(factory,
numModels, alpha_0, 1, 1);
}
@@ -194,7 +195,7 @@
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(DirichletCluster.class);
conf.setMapOutputKeyClass(Text.class);
- conf.setMapOutputValueClass(SparseVector.class);
+ conf.setMapOutputValueClass(VectorWritable.class);
FileInputFormat.setInputPaths(conf, new Path(input));
Path outPath = new Path(stateOut);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java Wed Jan 13 08:01:34 2010
@@ -32,17 +32,18 @@
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.TimesFunction;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
public class DirichletMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, Vector, Text, Vector> {
+ Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
- private DirichletState<Vector> state;
+ private DirichletState<VectorWritable> state;
@Override
- public void map(WritableComparable<?> key, Vector v,
- OutputCollector<Text, Vector> output, Reporter reporter) throws IOException {
+ public void map(WritableComparable<?> key, VectorWritable v,
+ OutputCollector<Text, VectorWritable> output, Reporter reporter) throws IOException {
// compute a normalized vector of probabilities that v is described by each model
Vector pi = normalizedProbabilities(state, v);
// then pick one model by sampling a Multinomial distribution based upon them
@@ -51,7 +52,7 @@
output.collect(new Text(String.valueOf(k)), v);
}
- public void configure(DirichletState<Vector> state) {
+ public void configure(DirichletState<VectorWritable> state) {
this.state = state;
}
@@ -61,14 +62,14 @@
state = getDirichletState(job);
}
- public static DirichletState<Vector> getDirichletState(JobConf job) {
+ public static DirichletState<VectorWritable> getDirichletState(JobConf job) {
String statePath = job.get(DirichletDriver.STATE_IN_KEY);
String modelFactory = job.get(DirichletDriver.MODEL_FACTORY_KEY);
String numClusters = job.get(DirichletDriver.NUM_CLUSTERS_KEY);
String alpha_0 = job.get(DirichletDriver.ALPHA_0_KEY);
try {
- DirichletState<Vector> state = DirichletDriver.createState(modelFactory,
+ DirichletState<VectorWritable> state = DirichletDriver.createState(modelFactory,
Integer.parseInt(numClusters), Double.parseDouble(alpha_0));
Path path = new Path(statePath);
FileSystem fs = FileSystem.get(path.toUri(), job);
@@ -78,11 +79,11 @@
job);
try {
Text key = new Text();
- DirichletCluster<Vector> cluster = new DirichletCluster<Vector>();
+ DirichletCluster<VectorWritable> cluster = new DirichletCluster<VectorWritable>();
while (reader.next(key, cluster)) {
int index = Integer.parseInt(key.toString());
state.getClusters().set(index, cluster);
- cluster = new DirichletCluster<Vector>();
+ cluster = new DirichletCluster<VectorWritable>();
}
} finally {
reader.close();
@@ -110,7 +111,7 @@
* @param v an Vector
* @return the Vector of probabilities
*/
- private static Vector normalizedProbabilities(DirichletState<Vector> state, Vector v) {
+ private static Vector normalizedProbabilities(DirichletState<VectorWritable> state, VectorWritable v) {
Vector pi = new DenseVector(state.getNumClusters());
double max = 0;
for (int k = 0; k < state.getNumClusters(); k++) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java Wed Jan 13 08:01:34 2010
@@ -25,38 +25,39 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.Iterator;
public class DirichletReducer extends MapReduceBase implements
- Reducer<Text, Vector, Text, DirichletCluster<Vector>> {
+ Reducer<Text, VectorWritable, Text, DirichletCluster<VectorWritable>> {
- private DirichletState<Vector> state;
+ private DirichletState<VectorWritable> state;
- private Model<Vector>[] newModels;
+ private Model<VectorWritable>[] newModels;
- public Model<Vector>[] getNewModels() {
+ public Model<VectorWritable>[] getNewModels() {
return newModels;
}
@Override
- public void reduce(Text key, Iterator<Vector> values,
- OutputCollector<Text, DirichletCluster<Vector>> output, Reporter reporter)
+ public void reduce(Text key, Iterator<VectorWritable> values,
+ OutputCollector<Text, DirichletCluster<VectorWritable>> output, Reporter reporter)
throws IOException {
int k = Integer.parseInt(key.toString());
- Model<Vector> model = newModels[k];
+ Model<VectorWritable> model = newModels[k];
while (values.hasNext()) {
- Vector v = values.next();
+ VectorWritable v = values.next();
model.observe(v);
}
model.computeParameters();
- DirichletCluster<Vector> cluster = state.getClusters().get(k);
+ DirichletCluster<VectorWritable> cluster = state.getClusters().get(k);
cluster.setModel(model);
output.collect(key, cluster);
}
- public void configure(DirichletState<Vector> state) {
+ public void configure(DirichletState<VectorWritable> state) {
this.state = state;
this.newModels = state.getModelFactory().sampleFromPosterior(state.getModels());
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java Wed Jan 13 08:01:34 2010
@@ -20,6 +20,7 @@
import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
/**
* An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
@@ -27,11 +28,11 @@
* assymetrical regions to be covered by a model.
*/
public class AsymmetricSampledNormalDistribution implements
- ModelDistribution<Vector> {
+ ModelDistribution<VectorWritable> {
@Override
- public Model<Vector>[] sampleFromPrior(int howMany) {
- Model<Vector>[] result = new AsymmetricSampledNormalModel[howMany];
+ public Model<VectorWritable>[] sampleFromPrior(int howMany) {
+ Model<VectorWritable>[] result = new AsymmetricSampledNormalModel[howMany];
for (int i = 0; i < howMany; i++) {
double[] m = {UncommonDistributions.rNorm(0, 1),
UncommonDistributions.rNorm(0, 1)};
@@ -45,8 +46,8 @@
}
@Override
- public Model<Vector>[] sampleFromPosterior(Model<Vector>[] posterior) {
- Model<Vector>[] result = new AsymmetricSampledNormalModel[posterior.length];
+ public Model<VectorWritable>[] sampleFromPosterior(Model<VectorWritable>[] posterior) {
+ Model<VectorWritable>[] result = new AsymmetricSampledNormalModel[posterior.length];
for (int i = 0; i < posterior.length; i++) {
AsymmetricSampledNormalModel m = (AsymmetricSampledNormalModel) posterior[i];
result[i] = m.sample();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java Wed Jan 13 08:01:34 2010
@@ -17,15 +17,15 @@
package org.apache.mahout.clustering.dirichlet.models;
-import org.apache.mahout.math.AbstractVector;
import org.apache.mahout.math.SquareRootFunction;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-public class AsymmetricSampledNormalModel implements Model<Vector> {
+public class AsymmetricSampledNormalModel implements Model<VectorWritable> {
private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);
@@ -72,7 +72,8 @@
}
@Override
- public void observe(Vector x) {
+ public void observe(VectorWritable v) {
+ Vector x = v.get();
s0++;
if (s1 == null) {
s1 = x.clone();
@@ -118,7 +119,8 @@
}
@Override
- public double pdf(Vector x) {
+ public double pdf(VectorWritable v) {
+ Vector x = v.get();
// return the product of the two component pdfs
if (x.getNumNondefaultElements() != 2) {
throw new IllegalArgumentException();
@@ -156,19 +158,19 @@
@Override
public void readFields(DataInput in) throws IOException {
- this.mean = AbstractVector.readVector(in);
- this.stdDev = AbstractVector.readVector(in);
+ this.mean = VectorWritable.readVector(in);
+ this.stdDev = VectorWritable.readVector(in);
this.s0 = in.readInt();
- this.s1 = AbstractVector.readVector(in);
- this.s2 = AbstractVector.readVector(in);
+ this.s1 = VectorWritable.readVector(in);
+ this.s2 = VectorWritable.readVector(in);
}
@Override
public void write(DataOutput out) throws IOException {
- AbstractVector.writeVector(out, mean);
- AbstractVector.writeVector(out, stdDev);
+ VectorWritable.writeVector(out, mean);
+ VectorWritable.writeVector(out, stdDev);
out.writeInt(s0);
- AbstractVector.writeVector(out, s1);
- AbstractVector.writeVector(out, s2);
+ VectorWritable.writeVector(out, s1);
+ VectorWritable.writeVector(out, s2);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java Wed Jan 13 08:01:34 2010
@@ -17,15 +17,15 @@
package org.apache.mahout.clustering.dirichlet.models;
-import org.apache.mahout.math.AbstractVector;
import org.apache.mahout.math.SquareRootFunction;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-public class NormalModel implements Model<Vector> {
+public class NormalModel implements Model<VectorWritable> {
private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);
@@ -75,17 +75,18 @@
}
@Override
- public void observe(Vector x) {
+ public void observe(VectorWritable x) {
s0++;
+ Vector v = x.get();
if (s1 == null) {
- s1 = x.clone();
+ s1 = v.clone();
} else {
- s1 = s1.plus(x);
+ s1 = s1.plus(v);
}
if (s2 == null) {
- s2 = x.times(x);
+ s2 = v.times(v);
} else {
- s2 = s2.plus(x.times(x));
+ s2 = s2.plus(v.times(v));
}
}
@@ -106,7 +107,8 @@
}
@Override
- public double pdf(Vector x) {
+ public double pdf(VectorWritable v) {
+ Vector x = v.get();
double sd2 = stdDev * stdDev;
double exp = -(x.dot(x) - 2 * x.dot(mean) + mean.dot(mean)) / (2 * sd2);
double ex = Math.exp(exp);
@@ -133,19 +135,19 @@
@Override
public void readFields(DataInput in) throws IOException {
- this.mean = AbstractVector.readVector(in);
+ this.mean = VectorWritable.readVector(in);
this.stdDev = in.readDouble();
this.s0 = in.readInt();
- this.s1 = AbstractVector.readVector(in);
- this.s2 = AbstractVector.readVector(in);
+ this.s1 = VectorWritable.readVector(in);
+ this.s2 = VectorWritable.readVector(in);
}
@Override
public void write(DataOutput out) throws IOException {
- AbstractVector.writeVector(out, mean);
+ VectorWritable.writeVector(out, mean);
out.writeDouble(stdDev);
out.writeInt(s0);
- AbstractVector.writeVector(out, s1);
- AbstractVector.writeVector(out, s2);
+ VectorWritable.writeVector(out, s1);
+ VectorWritable.writeVector(out, s2);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java Wed Jan 13 08:01:34 2010
@@ -19,16 +19,17 @@
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
/**
* An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
* Normal Distribution
*/
-public class NormalModelDistribution implements ModelDistribution<Vector> {
+public class NormalModelDistribution implements ModelDistribution<VectorWritable> {
@Override
- public Model<Vector>[] sampleFromPrior(int howMany) {
- Model<Vector>[] result = new NormalModel[howMany];
+ public Model<VectorWritable>[] sampleFromPrior(int howMany) {
+ Model<VectorWritable>[] result = new NormalModel[howMany];
for (int i = 0; i < howMany; i++) {
result[i] = new NormalModel(new DenseVector(2), 1);
}
@@ -36,8 +37,8 @@
}
@Override
- public Model<Vector>[] sampleFromPosterior(Model<Vector>[] posterior) {
- Model<Vector>[] result = new NormalModel[posterior.length];
+ public Model<VectorWritable>[] sampleFromPosterior(Model<VectorWritable>[] posterior) {
+ Model<VectorWritable>[] result = new NormalModel[posterior.length];
for (int i = 0; i < posterior.length; i++) {
NormalModel m = (NormalModel) posterior[i];
result[i] = m.sample();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java Wed Jan 13 08:01:34 2010
@@ -20,6 +20,7 @@
import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
/**
* An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
@@ -28,8 +29,8 @@
public class SampledNormalDistribution extends NormalModelDistribution {
@Override
- public Model<Vector>[] sampleFromPrior(int howMany) {
- Model<Vector>[] result = new SampledNormalModel[howMany];
+ public Model<VectorWritable>[] sampleFromPrior(int howMany) {
+ Model<VectorWritable>[] result = new SampledNormalModel[howMany];
for (int i = 0; i < howMany; i++) {
double[] m = {UncommonDistributions.rNorm(0, 1),
UncommonDistributions.rNorm(0, 1)};
@@ -40,8 +41,8 @@
}
@Override
- public Model<Vector>[] sampleFromPosterior(Model<Vector>[] posterior) {
- Model<Vector>[] result = new SampledNormalModel[posterior.length];
+ public Model<VectorWritable>[] sampleFromPosterior(Model<VectorWritable>[] posterior) {
+ Model<VectorWritable>[] result = new SampledNormalModel[posterior.length];
for (int i = 0; i < posterior.length; i++) {
SampledNormalModel m = (SampledNormalModel) posterior[i];
result[i] = m.sample();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java Wed Jan 13 08:01:34 2010
@@ -25,21 +25,22 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class FuzzyKMeansClusterMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, Vector, Text, FuzzyKMeansOutput> {
+ Mapper<WritableComparable<?>, VectorWritable, Text, FuzzyKMeansOutput> {
private final List<SoftCluster> clusters = new ArrayList<SoftCluster>();
private FuzzyKMeansClusterer clusterer;
@Override
- public void map(WritableComparable<?> key, Vector point,
+ public void map(WritableComparable<?> key, VectorWritable point,
OutputCollector<Text, FuzzyKMeansOutput> output, Reporter reporter) throws IOException {
- clusterer.outputPointWithClusterProbabilities(key.toString(), point, clusters, output);
+ clusterer.outputPointWithClusterProbabilities(key.toString(), point.get(), clusters, output);
}
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Wed Jan 13 08:01:34 2010
@@ -45,6 +45,7 @@
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
+import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -179,10 +180,10 @@
}
if (cmdLine.hasOption(clusteringOpt)) {
- runClustering(input, clusters, output, measureClass, convergenceDelta, numMapTasks, m, vectorClass);
+ runClustering(input, clusters, output, measureClass, convergenceDelta, numMapTasks, m);
} else {
runJob(input, clusters, output, measureClass, convergenceDelta,
- maxIterations, numMapTasks, numReduceTasks, m, vectorClass);
+ maxIterations, numMapTasks, numReduceTasks, m);
}
@@ -206,11 +207,10 @@
* @param numMapTasks the number of mapper tasks
* @param numReduceTasks the number of reduce tasks
* @param m the fuzzification factor, see http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
- * @param vectorClass the {@link org.apache.mahout.math.Vector} implementation to use
*/
public static void runJob(String input, String clustersIn, String output,
String measureClass, double convergenceDelta, int maxIterations,
- int numMapTasks, int numReduceTasks, float m, Class<? extends Vector> vectorClass) {
+ int numMapTasks, int numReduceTasks, float m) {
boolean converged = false;
int iteration = 0;
@@ -233,7 +233,7 @@
log.info("Clustering ");
runClustering(input, clustersIn, output + File.separator + "points",
- measureClass, convergenceDelta, numMapTasks, m, vectorClass);
+ measureClass, convergenceDelta, numMapTasks, m);
}
/**
@@ -304,13 +304,13 @@
*/
private static void runClustering(String input, String clustersIn,
String output, String measureClass, double convergenceDelta,
- int numMapTasks, float m, Class<? extends Vector> vectorClass) {
+ int numMapTasks, float m) {
JobConf conf = new JobConf(FuzzyKMeansDriver.class);
conf.setJobName("Fuzzy K Means Clustering");
conf.setMapOutputKeyClass(Text.class);
- conf.setMapOutputValueClass(vectorClass);
+ conf.setMapOutputValueClass(VectorWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(FuzzyKMeansOutput.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansInfo.java Wed Jan 13 08:01:34 2010
@@ -18,8 +18,8 @@
package org.apache.mahout.clustering.fuzzykmeans;
import org.apache.hadoop.io.Writable;
-import org.apache.mahout.math.AbstractVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.DataInput;
import java.io.DataOutput;
@@ -65,12 +65,12 @@
@Override
public void write(DataOutput out) throws IOException {
out.writeDouble(probability);
- AbstractVector.writeVector(out, pointTotal);
+ VectorWritable.writeVector(out, pointTotal);
}
@Override
public void readFields(DataInput in) throws IOException {
this.probability = in.readDouble();
- this.pointTotal = AbstractVector.readVector(in);
+ this.pointTotal = VectorWritable.readVector(in);
}
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java Wed Jan 13 08:01:34 2010
@@ -106,7 +106,7 @@
String vectorClassName = cmdLine.getValue(vectorClassOpt).toString();
Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
runJob(input, clusters, output, measureClass, convergenceDelta,
- maxIterations, numMapTasks, numReduceTasks, doCanopy, m, vectorClass);
+ maxIterations, numMapTasks, numReduceTasks, doCanopy, m);
} catch (OptionException e) {
log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
@@ -128,18 +128,18 @@
*/
public static void runJob(String input, String clustersIn, String output,
String measureClass, double convergenceDelta, int maxIterations,
- int numMapTasks, int numReduceTasks, boolean doCanopy, float m, Class<? extends Vector> vectorClass)
+ int numMapTasks, int numReduceTasks, boolean doCanopy, float m)
throws IOException {
// run canopy to find initial clusters
if (doCanopy) {
CanopyDriver.runJob(input, clustersIn, ManhattanDistanceMeasure.class
- .getName(), 100.1, 50.1, vectorClass);
+ .getName(), 100.1, 50.1);
}
// run fuzzy k -means
FuzzyKMeansDriver.runJob(input, clustersIn, output, measureClass,
- convergenceDelta, maxIterations, numMapTasks, numReduceTasks, m, vectorClass);
+ convergenceDelta, maxIterations, numMapTasks, numReduceTasks, m);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java Wed Jan 13 08:01:34 2010
@@ -25,6 +25,7 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -33,7 +34,7 @@
import java.util.List;
public class FuzzyKMeansMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, Vector, Text, FuzzyKMeansInfo> {
+ Mapper<WritableComparable<?>, VectorWritable, Text, FuzzyKMeansInfo> {
private static final Logger log = LoggerFactory.getLogger(FuzzyKMeansMapper.class);
@@ -41,9 +42,9 @@
private FuzzyKMeansClusterer clusterer;
@Override
- public void map(WritableComparable<?> key, Vector point,
+ public void map(WritableComparable<?> key, VectorWritable point,
OutputCollector<Text, FuzzyKMeansInfo> output, Reporter reporter) throws IOException {
- clusterer.emitPointProbToCluster(point, clusters, output);
+ clusterer.emitPointProbToCluster(point.get(), clusters, output);
}
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java Wed Jan 13 08:01:34 2010
@@ -18,10 +18,7 @@
package org.apache.mahout.clustering.fuzzykmeans;
import org.apache.hadoop.io.Writable;
-import org.apache.mahout.math.AbstractVector;
-import org.apache.mahout.math.SparseVector;
-import org.apache.mahout.math.SquareRootFunction;
-import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.*;
import java.io.DataInput;
import java.io.DataOutput;
@@ -91,14 +88,14 @@
out.writeInt(clusterId);
out.writeBoolean(converged);
Vector vector = computeCentroid();
- AbstractVector.writeVector(out, vector);
+ VectorWritable.writeVector(out, vector);
}
@Override
public void readFields(DataInput in) throws IOException {
clusterId = in.readInt();
converged = in.readBoolean();
- center = AbstractVector.readVector(in);
+ center = VectorWritable.readVector(in);
this.pointProbSum = 0;
this.weightedPointTotal = center.like();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Wed Jan 13 08:01:34 2010
@@ -21,6 +21,7 @@
import org.apache.mahout.math.SquareRootFunction;
import org.apache.mahout.math.Vector;
import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.math.VectorWritable;
import java.io.DataInput;
import java.io.DataOutput;
@@ -94,14 +95,14 @@
public void write(DataOutput out) throws IOException {
super.write(out);
out.writeBoolean(converged);
- AbstractVector.writeVector(out, computeCentroid());
+ VectorWritable.writeVector(out, computeCentroid());
}
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
this.converged = in.readBoolean();
- this.setCenter(AbstractVector.readVector(in));
+ this.setCenter(VectorWritable.readVector(in));
this.setNumPoints(0);
this.setPointTotal(getCenter().like());
this.pointSquaredTotal = getCenter().like();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Wed Jan 13 08:01:34 2010
@@ -26,21 +26,22 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class KMeansClusterMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, Vector, Text, Text> {
+ Mapper<WritableComparable<?>, VectorWritable, Text, Text> {
private final List<Cluster> clusters = new ArrayList<Cluster>();
private KMeansClusterer clusterer;
@Override
- public void map(WritableComparable<?> key, Vector point,
+ public void map(WritableComparable<?> key, VectorWritable point,
OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
- this.clusterer.outputPointWithClusterInfo(point, clusters, output);
+ this.clusterer.outputPointWithClusterInfo(point.get(), clusters, output);
}
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Wed Jan 13 08:01:34 2010
@@ -41,6 +41,7 @@
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
+import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -158,7 +159,7 @@
clusters = RandomSeedGenerator
.buildRandom(input, clusters, Integer.parseInt(cmdLine.getValue(kOpt).toString())).toString();
}
- runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks, vectorClass);
+ runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks);
} catch (OptionException e) {
log.error("Exception", e);
CommandLineUtil.printHelp(group);
@@ -177,13 +178,13 @@
* @param numReduceTasks the number of reducers
*/
public static void runJob(String input, String clustersIn, String output, String measureClass,
- double convergenceDelta, int maxIterations, int numReduceTasks, Class<? extends Vector> vectorClass) {
+ double convergenceDelta, int maxIterations, int numReduceTasks) {
// iterate until the clusters converge
String delta = Double.toString(convergenceDelta);
if (log.isInfoEnabled()) {
log.info("Input: " + input + " Clusters In: " + clustersIn + " Out: " + output + " Distance: " + measureClass);
log.info("convergence: " + convergenceDelta + " max Iterations: " + maxIterations + " num Reduce Tasks: "
- + numReduceTasks + " Input Vectors: " + vectorClass.getName());
+ + numReduceTasks + " Input Vectors: " + VectorWritable.class.getName());
}
boolean converged = false;
int iteration = 0;
@@ -198,7 +199,7 @@
}
// now actually cluster the points
log.info("Clustering ");
- runClustering(input, clustersIn, output + DEFAULT_OUTPUT_DIRECTORY, measureClass, delta, vectorClass);
+ runClustering(input, clustersIn, output + DEFAULT_OUTPUT_DIRECTORY, measureClass, delta);
}
/**
@@ -255,18 +256,18 @@
* @param convergenceDelta the convergence delta value
*/
private static void runClustering(String input, String clustersIn, String output, String measureClass,
- String convergenceDelta, Class<? extends Vector> vectorClass) {
+ String convergenceDelta) {
if (log.isInfoEnabled()) {
log.info("Running Clustering");
log.info("Input: " + input + " Clusters In: " + clustersIn + " Out: " + output + " Distance: " + measureClass);
- log.info("convergence: " + convergenceDelta + " Input Vectors: " + vectorClass.getName());
+ log.info("convergence: " + convergenceDelta + " Input Vectors: " + VectorWritable.class.getName());
}
JobConf conf = new JobConf(KMeansDriver.class);
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setMapOutputKeyClass(Text.class);
- conf.setMapOutputValueClass(vectorClass);
+ conf.setMapOutputValueClass(VectorWritable.class);
conf.setOutputKeyClass(Text.class);
// the output is the cluster id
conf.setOutputValueClass(Text.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansInfo.java Wed Jan 13 08:01:34 2010
@@ -18,8 +18,8 @@
package org.apache.mahout.clustering.kmeans;
import org.apache.hadoop.io.Writable;
-import org.apache.mahout.math.AbstractVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.DataInput;
import java.io.DataOutput;
@@ -49,12 +49,12 @@
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(points);
- AbstractVector.writeVector(out, pointTotal);
+ VectorWritable.writeVector(out, pointTotal);
}
@Override
public void readFields(DataInput in) throws IOException {
this.points = in.readInt();
- this.pointTotal = AbstractVector.readVector(in);
+ this.pointTotal = VectorWritable.readVector(in);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java Wed Jan 13 08:01:34 2010
@@ -25,22 +25,23 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class KMeansMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, Vector, Text, KMeansInfo> {
+ Mapper<WritableComparable<?>, VectorWritable, Text, KMeansInfo> {
private KMeansClusterer clusterer;
private final List<Cluster> clusters = new ArrayList<Cluster>();
@Override
- public void map(WritableComparable<?> key, Vector point,
+ public void map(WritableComparable<?> key, VectorWritable point,
OutputCollector<Text, KMeansInfo> output, Reporter reporter)
throws IOException {
- this.clusterer.emitPointToNearestCluster(point, this.clusters, output);
+ this.clusterer.emitPointToNearestCluster(point.get(), this.clusters, output);
}
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java Wed Jan 13 08:01:34 2010
@@ -25,6 +25,7 @@
import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.math.Vector;
import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -67,7 +68,7 @@
if (newFile) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(input), conf);
Writable key = (Writable) reader.getKeyClass().newInstance();
- Vector value = (Vector) reader.getValueClass().newInstance();
+ VectorWritable value = (VectorWritable) reader.getValueClass().newInstance();
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class, Cluster.class);
Random random = RandomUtils.getRandom();
@@ -75,8 +76,8 @@
List<Cluster> chosenClusters = new ArrayList<Cluster>(k);
int nextClusterId = 0;
while (reader.next(key, value)) {
- Cluster newCluster = new Cluster(value, nextClusterId++);
- newCluster.addPoint(value);
+ Cluster newCluster = new Cluster(value.get(), nextClusterId++);
+ newCluster.addPoint(value.get());
Text newText = new Text(key.toString());
int currentSize = chosenTexts.size();
if (currentSize < k) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java Wed Jan 13 08:01:34 2010
@@ -21,12 +21,7 @@
import com.google.gson.GsonBuilder;
import com.google.gson.reflect.TypeToken;
import org.apache.mahout.clustering.ClusterBase;
-import org.apache.mahout.math.AbstractVector;
-import org.apache.mahout.math.CardinalityException;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.JsonVectorAdapter;
-import org.apache.mahout.math.PlusFunction;
-import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.*;
import java.io.DataInput;
import java.io.DataOutput;
@@ -195,21 +190,21 @@
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
- this.setCenter(AbstractVector.readVector(in));
+ this.setCenter(VectorWritable.readVector(in));
int numpoints = in.readInt();
this.boundPoints = new ArrayList<Vector>();
for (int i = 0; i < numpoints; i++) {
- this.boundPoints.add(AbstractVector.readVector(in));
+ this.boundPoints.add(VectorWritable.readVector(in));
}
}
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
- AbstractVector.writeVector(out, computeCentroid());
+ VectorWritable.writeVector(out, computeCentroid());
out.writeInt(boundPoints.size());
for (Vector v : boundPoints) {
- AbstractVector.writeVector(out, v);
+ VectorWritable.writeVector(out, v);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java Wed Jan 13 08:01:34 2010
@@ -25,6 +25,7 @@
import org.apache.mahout.common.parameters.ClassParameter;
import org.apache.mahout.common.parameters.Parameter;
import org.apache.mahout.common.parameters.PathParameter;
+import org.apache.mahout.math.VectorWritable;
import java.io.DataInputStream;
import java.io.FileNotFoundException;
@@ -63,7 +64,7 @@
try {
if (weightsFile.get() != null) {
FileSystem fs = FileSystem.get(weightsFile.get().toUri(), jobConf);
- Vector weights = (Vector) vectorClass.get().newInstance();
+ VectorWritable weights = (VectorWritable) vectorClass.get().newInstance();
if (!fs.exists(weightsFile.get())) {
throw new FileNotFoundException(weightsFile.get().toString());
}
@@ -73,7 +74,7 @@
} finally {
in.close();
}
- this.weights = weights;
+ this.weights = weights.get();
}
} catch (IOException e) {
throw new IllegalStateException(e);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/ClusteringTestUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/ClusteringTestUtils.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/ClusteringTestUtils.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/ClusteringTestUtils.java Wed Jan 13 08:01:34 2010
@@ -24,6 +24,7 @@
import org.apache.hadoop.io.SequenceFile;
import org.apache.mahout.math.SparseVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import java.io.File;
import java.io.IOException;
@@ -33,13 +34,12 @@
private ClusteringTestUtils() {
}
- public static void writePointsToFile(List<Vector> points, String fileName, FileSystem fs, Configuration conf)
+ public static void writePointsToFile(List<VectorWritable> points, String fileName, FileSystem fs, Configuration conf)
throws IOException {
Path path = new Path(fileName);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, LongWritable.class, SparseVector.class);
+ SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, LongWritable.class, VectorWritable.class);
long recNum = 0;
- for (Vector point : points) {
- //point.write(dataOut);
+ for (VectorWritable point : points) {
writer.append(new LongWritable(recNum++), point);
}
writer.close();