You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by jm...@apache.org on 2010/01/13 09:01:42 UTC
svn commit: r898669 [2/3] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/apache/mahout/clustering/dirichlet/ core/src/main/java/org/ap...

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Wed Jan 13 08:01:34 2010
@@ -34,6 +34,7 @@
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
 import org.apache.mahout.common.distance.UserDefinedDistanceMeasure;
+import org.apache.mahout.math.VectorWritable;
 
 import java.io.File;
 import java.util.ArrayList;
@@ -64,13 +65,13 @@
     super(name);
   }
 
-  private static List<Vector> getPoints(double[][] raw) {
-    List<Vector> points = new ArrayList<Vector>();
+  private static List<VectorWritable> getPoints(double[][] raw) {
+    List<VectorWritable> points = new ArrayList<VectorWritable>();
     int i = 0;
     for (double[] fr : raw) {
       Vector vec = new SparseVector(String.valueOf(i++), fr.length);
       vec.assign(fr);
-      points.add(vec);
+      points.add(new VectorWritable(vec));
     }
     return points;
   }
@@ -170,7 +171,7 @@
    * @return the List<Canopy> created
    */
   static List<Canopy> populateCanopies(DistanceMeasure measure,
-                                       List<Vector> points, double t1, double t2) {
+                                       List<VectorWritable> points, double t1, double t2) {
     List<Canopy> canopies = new ArrayList<Canopy>();
     /**
      * Reference Implementation: Given a distance metric, one can create
@@ -184,13 +185,13 @@
      */
     int nextCanopyId = 0;
     while (!points.isEmpty()) {
-      Iterator<Vector> ptIter = points.iterator();
-      Vector p1 = ptIter.next();
+      Iterator<VectorWritable> ptIter = points.iterator();
+      Vector p1 = ptIter.next().get();
       ptIter.remove();
       Canopy canopy = new VisibleCanopy(p1, nextCanopyId++);
       canopies.add(canopy);
       while (ptIter.hasNext()) {
-        Vector p2 = ptIter.next();
+        Vector p2 = ptIter.next().get();
         double dist = measure.distance(p1, p2);
         // Put all points that are within distance threshold T1 into the canopy
         if (dist < t1) {
@@ -251,12 +252,12 @@
 
   /** Story: User can cluster points without instantiating them all in memory at once */
   public void testIterativeManhattan() throws Exception {
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     CanopyClusterer clusterer = new CanopyClusterer(new ManhattanDistanceMeasure(), 3.1, 2.1);
 
     List<Canopy> canopies = new ArrayList<Canopy>();
-    for (Vector point : points) {
-      clusterer.addPointToCanopies(point, canopies);
+    for (VectorWritable point : points) {
+      clusterer.addPointToCanopies(point.get(), canopies);
     }
 
     System.out.println("testIterativeManhattan");
@@ -266,12 +267,12 @@
 
   /** Story: User can cluster points without instantiating them all in memory at once */
   public void testIterativeEuclidean() throws Exception {
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     CanopyClusterer clusterer = new CanopyClusterer(new EuclideanDistanceMeasure(), 3.1, 2.1);
 
     List<Canopy> canopies = new ArrayList<Canopy>();
-    for (Vector point : points) {
-      clusterer.addPointToCanopies(point, canopies);
+    for (VectorWritable point : points) {
+      clusterer.addPointToCanopies(point.get(), canopies);
     }
 
     System.out.println("testIterativeEuclidean");
@@ -291,21 +292,21 @@
     conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(2.1));
     mapper.configure(conf);
     
-    DummyOutputCollector<Text, Vector> collector = new DummyOutputCollector<Text, Vector>();
-    List<Vector> points = getPoints(raw);
+    DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
+    List<VectorWritable> points = getPoints(raw);
     // map the data
-    for (Vector point : points) {
+    for (VectorWritable point : points) {
       mapper.map(new Text(), point, collector, null);
     }
     mapper.close();
     assertEquals("Number of map results", 1, collector.getData().size());
     // now verify the output
-    List<Vector> data = collector.getValue("centroid");
+    List<VectorWritable> data = collector.getValue("centroid");
     assertEquals("Number of centroids", 3, data.size());
     for (int i = 0; i < data.size(); i++) {
       assertEquals("Centroid error",
-          manhattanCentroids.get(i).asFormatString(), data.get(i)
-              .asFormatString());
+          manhattanCentroids.get(i).asFormatString(),
+          data.get(i).get().asFormatString());
     }
   }
 
@@ -321,21 +322,21 @@
     conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(2.1));
     mapper.configure(conf);
     
-    DummyOutputCollector<Text, Vector> collector = new DummyOutputCollector<Text, Vector>();
-    List<Vector> points = getPoints(raw);
+    DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
+    List<VectorWritable> points = getPoints(raw);
     // map the data
-    for (Vector point : points) {
+    for (VectorWritable point : points) {
       mapper.map(new Text(), point, collector, null);
     }
     mapper.close();
     assertEquals("Number of map results", 1, collector.getData().size());
     // now verify the output
-    List<Vector> data = collector.getValue("centroid");
+    List<VectorWritable> data = collector.getValue("centroid");
     assertEquals("Number of centroids", 3, data.size());
     for (int i = 0; i < data.size(); i++) {
       assertEquals("Centroid error",
-          euclideanCentroids.get(i).asFormatString(), data.get(i)
-              .asFormatString());
+          euclideanCentroids.get(i).asFormatString(),
+          data.get(i).get().asFormatString());
     }
   }
 
@@ -352,7 +353,7 @@
     reducer.configure(conf);
     
     DummyOutputCollector<Text, Canopy> collector = new DummyOutputCollector<Text, Canopy>();
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     reducer.reduce(new Text("centroid"), points.iterator(), collector, null);
     reducer.close();
     Set<String> keys = collector.getKeys();
@@ -378,7 +379,7 @@
     reducer.configure(conf);
     
     DummyOutputCollector<Text, Canopy> collector = new DummyOutputCollector<Text, Canopy>();
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     reducer.reduce(new Text("centroid"), points.iterator(), collector, null);
     reducer.close();
     Set<String> keys = collector.getKeys();
@@ -393,7 +394,7 @@
 
   /** Story: User can produce final canopy centers using a Hadoop map/reduce job and a ManhattanDistanceMeasure. */
   public void testCanopyGenManhattanMR() throws Exception {
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();
@@ -402,8 +403,7 @@
     ClusteringTestUtils.writePointsToFile(points, "testdata/file1", fs, job);
     ClusteringTestUtils.writePointsToFile(points, "testdata/file2", fs, job);
     // now run the Canopy Driver
-    CanopyDriver.runJob("testdata", "output/canopies",
-        ManhattanDistanceMeasure.class.getName(), 3.1, 2.1, SparseVector.class);
+    CanopyDriver.runJob("testdata", "output/canopies", ManhattanDistanceMeasure.class.getName(), 3.1, 2.1);
 
     // verify output from sequence file
     Path path = new Path("output/canopies/part-00000");
@@ -427,7 +427,7 @@
 
   /** Story: User can produce final canopy centers using a Hadoop map/reduce job and a EuclideanDistanceMeasure. */
   public void testCanopyGenEuclideanMR() throws Exception {
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();
@@ -436,8 +436,7 @@
     ClusteringTestUtils.writePointsToFile(points, "testdata/file1", fs, job);
     ClusteringTestUtils.writePointsToFile(points, "testdata/file2", fs, job);
     // now run the Canopy Driver
-    CanopyDriver.runJob("testdata", "output/canopies",
-        EuclideanDistanceMeasure.class.getName(), 3.1, 2.1, SparseVector.class);
+    CanopyDriver.runJob("testdata", "output/canopies", EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
 
     // verify output from sequence file
     Path path = new Path("output/canopies/part-00000");
@@ -467,25 +466,25 @@
     mapper.configure(conf);
     
     List<Canopy> canopies = new ArrayList<Canopy>();
-    DummyOutputCollector<Text, Vector> collector = new DummyOutputCollector<Text, Vector>();
+    DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
     int nextCanopyId = 0;
     for (Vector centroid : manhattanCentroids) {
       canopies.add(new Canopy(centroid, nextCanopyId++));
     }
     mapper.config(canopies);
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     // map the data
-    for (Vector point : points) {
+    for (VectorWritable point : points) {
       mapper.map(new Text(), point, collector, null);
     }
-    Map<String, List<Vector>> data = collector.getData();
+    Map<String, List<VectorWritable>> data = collector.getData();
     assertEquals("Number of map results", canopies.size(), data.size());
-    for (Map.Entry<String, List<Vector>> stringListEntry : data.entrySet()) {
+    for (Map.Entry<String, List<VectorWritable>> stringListEntry : data.entrySet()) {
       String key = stringListEntry.getKey();
       Canopy canopy = findCanopy(key, canopies);
-      List<Vector> pts = stringListEntry.getValue();
-      for (Vector ptDef : pts) {
-        assertTrue("Point not in canopy", mapper.canopyCovers(canopy, ptDef));
+      List<VectorWritable> pts = stringListEntry.getValue();
+      for (VectorWritable ptDef : pts) {
+        assertTrue("Point not in canopy", mapper.canopyCovers(canopy, ptDef.get()));
       }
     }
   }
@@ -509,25 +508,25 @@
     mapper.configure(conf);
     
     List<Canopy> canopies = new ArrayList<Canopy>();
-    DummyOutputCollector<Text, Vector> collector = new DummyOutputCollector<Text, Vector>();
+    DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
     int nextCanopyId = 0;
     for (Vector centroid : euclideanCentroids) {
       canopies.add(new Canopy(centroid, nextCanopyId++));
     }
     mapper.config(canopies);
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     // map the data
-    for (Vector point : points) {
+    for (VectorWritable point : points) {
       mapper.map(new Text(), point, collector, null);
     }
-    Map<String, List<Vector>> data = collector.getData();
+    Map<String, List<VectorWritable>> data = collector.getData();
     assertEquals("Number of map results", canopies.size(), data.size());
-    for (Map.Entry<String, List<Vector>> stringListEntry : data.entrySet()) {
+    for (Map.Entry<String, List<VectorWritable>> stringListEntry : data.entrySet()) {
       String key = stringListEntry.getKey();
       Canopy canopy = findCanopy(key, canopies);
-      List<Vector> pts = stringListEntry.getValue();
-      for (Vector ptDef : pts) {
-        assertTrue("Point not in canopy", mapper.canopyCovers(canopy, ptDef));
+      List<VectorWritable> pts = stringListEntry.getValue();
+      for (VectorWritable ptDef : pts) {
+        assertTrue("Point not in canopy", mapper.canopyCovers(canopy, ptDef.get()));
       }
     }
   }
@@ -542,36 +541,36 @@
     mapper.configure(conf);
     
     List<Canopy> canopies = new ArrayList<Canopy>();
-    DummyOutputCollector<Text, Vector> collector = new DummyOutputCollector<Text, Vector>();
+    DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
     int nextCanopyId = 0;
     for (Vector centroid : manhattanCentroids) {
       canopies.add(new Canopy(centroid, nextCanopyId++));
     }
     mapper.config(canopies);
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     // map the data
-    for (Vector point : points) {
+    for (VectorWritable point : points) {
       mapper.map(new Text(), point, collector, null);
     }
-    Map<String, List<Vector>> data = collector.getData();
+    Map<String, List<VectorWritable>> data = collector.getData();
     assertEquals("Number of map results", canopies.size(), data.size());
 
     // reduce the data
-    Reducer<Text, Vector, Text, Vector> reducer = new IdentityReducer<Text, Vector>();
-    collector = new DummyOutputCollector<Text, Vector>();
-    for (Map.Entry<String, List<Vector>> stringListEntry : data.entrySet()) {
+    Reducer<Text, VectorWritable, Text, VectorWritable> reducer = new IdentityReducer<Text, VectorWritable>();
+    collector = new DummyOutputCollector<Text, VectorWritable>();
+    for (Map.Entry<String, List<VectorWritable>> stringListEntry : data.entrySet()) {
       reducer.reduce(new Text(stringListEntry.getKey()), stringListEntry
           .getValue().iterator(), collector, null);
     }
 
     // check the output
     data = collector.getData();
-    for (Map.Entry<String, List<Vector>> stringListEntry : data.entrySet()) {
+    for (Map.Entry<String, List<VectorWritable>> stringListEntry : data.entrySet()) {
       String key = stringListEntry.getKey();
       Canopy canopy = findCanopy(key, canopies);
-      List<Vector> pts = stringListEntry.getValue();
-      for (Vector ptDef : pts) {
-        assertTrue("Point not in canopy", mapper.canopyCovers(canopy, ptDef));
+      List<VectorWritable> pts = stringListEntry.getValue();
+      for (VectorWritable ptDef : pts) {
+        assertTrue("Point not in canopy", mapper.canopyCovers(canopy, ptDef.get()));
       }
     }
   }
@@ -586,43 +585,43 @@
     mapper.configure(conf);
     
     List<Canopy> canopies = new ArrayList<Canopy>();
-    DummyOutputCollector<Text, Vector> collector = new DummyOutputCollector<Text, Vector>();
+    DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
     int nextCanopyId = 0;
     for (Vector centroid : euclideanCentroids) {
       canopies.add(new Canopy(centroid, nextCanopyId++));
     }
     mapper.config(canopies);
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     // map the data
-    for (Vector point : points) {
+    for (VectorWritable point : points) {
       mapper.map(new Text(), point, collector, null);
     }
-    Map<String, List<Vector>> data = collector.getData();
+    Map<String, List<VectorWritable>> data = collector.getData();
 
     // reduce the data
-    Reducer<Text, Vector, Text, Vector> reducer = new IdentityReducer<Text, Vector>();
-    collector = new DummyOutputCollector<Text, Vector>();
-    for (Map.Entry<String, List<Vector>> stringListEntry : data.entrySet()) {
-      reducer.reduce(new Text(stringListEntry.getKey()), stringListEntry
-          .getValue().iterator(), collector, null);
+    Reducer<Text, VectorWritable, Text, VectorWritable> reducer = new IdentityReducer<Text, VectorWritable>();
+    collector = new DummyOutputCollector<Text, VectorWritable>();
+    for (Map.Entry<String, List<VectorWritable>> stringListEntry : data.entrySet()) {
+      reducer.reduce(new Text(stringListEntry.getKey()),
+          stringListEntry.getValue().iterator(), collector, null);
     }
 
     // check the output
     data = collector.getData();
     assertEquals("Number of map results", canopies.size(), data.size());
-    for (Map.Entry<String, List<Vector>> stringListEntry : data.entrySet()) {
+    for (Map.Entry<String, List<VectorWritable>> stringListEntry : data.entrySet()) {
       String key = stringListEntry.getKey();
       Canopy canopy = findCanopy(key, canopies);
-      List<Vector> pts = stringListEntry.getValue();
-      for (Vector ptDef : pts) {
-        assertTrue("Point not in canopy", mapper.canopyCovers(canopy, ptDef));
+      List<VectorWritable> pts = stringListEntry.getValue();
+      for (VectorWritable ptDef : pts) {
+        assertTrue("Point not in canopy", mapper.canopyCovers(canopy, ptDef.get()));
       }
     }
   }
 
   /** Story: User can produce final point clustering using a Hadoop map/reduce job and a ManhattanDistanceMeasure. */
   public void testClusteringManhattanMR() throws Exception {
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();
@@ -632,7 +631,7 @@
     ClusteringTestUtils.writePointsToFile(points, "testdata/file2", fs, conf);
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
-        ManhattanDistanceMeasure.class.getName(), 3.1, 2.1, SparseVector.class);
+        ManhattanDistanceMeasure.class.getName(), 3.1, 2.1);
     //TODO: change
     Path path = new Path("output/clusters/part-00000");
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
@@ -642,10 +641,10 @@
       count++;
     }*/
     Text txt = new Text();
-    SparseVector vector = new SparseVector();
+    VectorWritable vector = new VectorWritable();
     while (reader.next(txt, vector)) {
       count++;
-      System.out.println("Txt: " + txt + " Vec: " + vector.asFormatString());
+      System.out.println("Txt: " + txt + " Vec: " + vector.get().asFormatString());
     }
     // the point [3.0,3.0] is covered by both canopies
     assertEquals("number of points", 2 + 2 * points.size(), count);
@@ -654,7 +653,7 @@
 
   /** Story: User can produce final point clustering using a Hadoop map/reduce job and a EuclideanDistanceMeasure. */
   public void testClusteringEuclideanMR() throws Exception {
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();
@@ -664,7 +663,7 @@
     ClusteringTestUtils.writePointsToFile(points, "testdata/file2", fs, conf);
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
-        EuclideanDistanceMeasure.class.getName(), 3.1, 2.1, SparseVector.class);
+        EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
     Path path = new Path("output/clusters/part-00000");
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     int count = 0;
@@ -673,7 +672,7 @@
       count++;
     }*/
     Text txt = new Text();
-    SparseVector can = new SparseVector();
+    VectorWritable can = new VectorWritable();
     while (reader.next(txt, can)) {
       count++;
     }
@@ -689,7 +688,7 @@
 
   /** Story: Clustering algorithm must support arbitrary user defined distance measure */
   public void testUserDefinedDistanceMeasure() throws Exception {
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();
@@ -699,8 +698,7 @@
     ClusteringTestUtils.writePointsToFile(points, "testdata/file2", fs, conf);
     // now run the Canopy Driver. User defined measure happens to be a Manhattan
     // subclass so results are same.
-    CanopyDriver.runJob("testdata", "output/canopies",
-        UserDefinedDistanceMeasure.class.getName(), 3.1, 2.1, SparseVector.class);
+    CanopyDriver.runJob("testdata", "output/canopies", UserDefinedDistanceMeasure.class.getName(), 3.1, 2.1);
 
     // verify output from sequence file
     JobConf job = new JobConf(CanopyDriver.class);

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java Wed Jan 13 08:01:34 2010
@@ -25,19 +25,20 @@
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 import java.util.ArrayList;
 import java.util.List;
 
 public class TestDirichletClustering extends TestCase {
 
-  private List<Vector> sampleData;
+  private List<VectorWritable> sampleData;
 
   @Override
   protected void setUp() throws Exception {
     super.setUp();
     RandomUtils.useTestSeed();
-    sampleData = new ArrayList<Vector>();
+    sampleData = new ArrayList<VectorWritable>();
   }
 
   /**
@@ -52,17 +53,17 @@
     System.out.println("Generating " + num + " samples m=[" + mx + ", " + my
         + "] sd=" + sd);
     for (int i = 0; i < num; i++) {
-      sampleData.add(new DenseVector(new double[]{
+      sampleData.add(new VectorWritable(new DenseVector(new double[]{
           UncommonDistributions.rNorm(mx, sd),
-          UncommonDistributions.rNorm(my, sd)}));
+          UncommonDistributions.rNorm(my, sd)})));
     }
   }
 
-  private static void printResults(List<Model<Vector>[]> result, int significant) {
+  private static void printResults(List<Model<VectorWritable>[]> result, int significant) {
     int row = 0;
-    for (Model<Vector>[] r : result) {
+    for (Model<VectorWritable>[] r : result) {
       System.out.print("sample[" + row++ + "]= ");
-      for (Model<Vector> model : r) {
+      for (Model<VectorWritable> model : r) {
         if (model.count() > significant) {
           System.out.print(model.toString() + ", ");
         }
@@ -78,9 +79,9 @@
     generateSamples(30, 1, 0, 0.1);
     generateSamples(30, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new NormalModelDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 2);
     assertNotNull(result);
   }
@@ -91,9 +92,9 @@
     generateSamples(30, 1, 0, 0.1);
     generateSamples(30, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new SampledNormalDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 2);
     assertNotNull(result);
   }
@@ -104,9 +105,9 @@
     generateSamples(30, 1, 0, 0.1);
     generateSamples(30, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new AsymmetricSampledNormalDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 2);
     assertNotNull(result);
   }
@@ -117,9 +118,9 @@
     generateSamples(300, 1, 0, 0.1);
     generateSamples(300, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new NormalModelDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 20);
     assertNotNull(result);
   }
@@ -130,9 +131,9 @@
     generateSamples(300, 1, 0, 0.1);
     generateSamples(300, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new SampledNormalDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 20);
     assertNotNull(result);
   }
@@ -143,9 +144,9 @@
     generateSamples(300, 1, 0, 0.1);
     generateSamples(300, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new AsymmetricSampledNormalDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 20);
     assertNotNull(result);
   }
@@ -156,9 +157,9 @@
     generateSamples(3000, 1, 0, 0.1);
     generateSamples(3000, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new NormalModelDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 200);
     assertNotNull(result);
   }
@@ -169,9 +170,9 @@
     generateSamples(3000, 1, 0, 0.1);
     generateSamples(3000, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new AsymmetricSampledNormalDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 200);
     assertNotNull(result);
   }
@@ -182,9 +183,9 @@
     generateSamples(3000, 1, 0, 0.1);
     generateSamples(3000, 0, 1, 0.1);
 
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         new SampledNormalDistribution(), 1.0, 10, 1, 0);
-    List<Model<Vector>[]> result = dc.cluster(30);
+    List<Model<VectorWritable>[]> result = dc.cluster(30);
     printResults(result, 200);
     assertNotNull(result);
   }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java Wed Jan 13 08:01:34 2010
@@ -33,10 +33,7 @@
 import org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution;
 import org.apache.mahout.clustering.dirichlet.models.SampledNormalModel;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.JsonVectorAdapter;
-import org.apache.mahout.math.SparseVector;
-import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.*;
 import org.apache.mahout.common.DummyOutputCollector;
 import org.apache.mahout.common.RandomUtils;
 
@@ -48,7 +45,7 @@
 
 public class TestMapReduce extends TestCase {
 
-  private List<Vector> sampleData = new ArrayList<Vector>();
+  private List<VectorWritable> sampleData = new ArrayList<VectorWritable>();
 
   private FileSystem fs;
 
@@ -79,7 +76,7 @@
     for (int j = 0; j < values.length; j++) {
       v.setQuick(j, values[j]);
     }
-    sampleData.add(v);
+    sampleData.add(new VectorWritable(v));
   }
 
   /**
@@ -114,16 +111,16 @@
   /** Test the basic Mapper */
   public void testMapper() throws Exception {
     generateSamples(10, 0, 0, 1);
-    DirichletState<Vector> state = new DirichletState<Vector>(
+    DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(
         new NormalModelDistribution(), 5, 1, 0, 0);
     DirichletMapper mapper = new DirichletMapper();
     mapper.configure(state);
 
-    DummyOutputCollector<Text, Vector> collector = new DummyOutputCollector<Text, Vector>();
-    for (Vector v : sampleData) {
+    DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
+    for (VectorWritable v : sampleData) {
       mapper.map(null, v, collector, null);
     }
-    Map<String, List<Vector>> data = collector.getData();
+    Map<String, List<VectorWritable>> data = collector.getData();
     // this seed happens to produce two partitions, but they work
     assertEquals("output size", 3, data.size());
   }
@@ -134,37 +131,38 @@
     generateSamples(100, 2, 0, 1);
     generateSamples(100, 0, 2, 1);
     generateSamples(100, 2, 2, 1);
-    DirichletState<Vector> state = new DirichletState<Vector>(
+    DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(
         new SampledNormalDistribution(), 20, 1, 1, 0);
     DirichletMapper mapper = new DirichletMapper();
     mapper.configure(state);
 
-    DummyOutputCollector<Text, Vector> mapCollector = new DummyOutputCollector<Text, Vector>();
-    for (Vector v : sampleData) {
+    DummyOutputCollector<Text, VectorWritable> mapCollector = new DummyOutputCollector<Text, VectorWritable>();
+    for (VectorWritable v : sampleData) {
       mapper.map(null, v, mapCollector, null);
     }
-    Map<String, List<Vector>> data = mapCollector.getData();
+    Map<String, List<VectorWritable>> data = mapCollector.getData();
     // this seed happens to produce three partitions, but they work
     assertEquals("output size", 7, data.size());
 
     DirichletReducer reducer = new DirichletReducer();
     reducer.configure(state);
-    OutputCollector<Text, DirichletCluster<Vector>> reduceCollector = new DummyOutputCollector<Text, DirichletCluster<Vector>>();
+    OutputCollector<Text, DirichletCluster<VectorWritable>> reduceCollector =
+        new DummyOutputCollector<Text, DirichletCluster<VectorWritable>>();
     for (String key : mapCollector.getKeys()) {
       reducer.reduce(new Text(key), mapCollector.getValue(key).iterator(),
           reduceCollector, null);
     }
 
-    Model<Vector>[] newModels = reducer.getNewModels();
+    Model<VectorWritable>[] newModels = reducer.getNewModels();
     state.update(newModels);
   }
 
-  private static void printModels(Iterable<Model<Vector>[]> results, int significant) {
+  private static void printModels(Iterable<Model<VectorWritable>[]> results, int significant) {
     int row = 0;
-    for (Model<Vector>[] r : results) {
+    for (Model<VectorWritable>[] r : results) {
       System.out.print("sample[" + row++ + "]= ");
       for (int k = 0; k < r.length; k++) {
-        Model<Vector> model = r[k];
+        Model<VectorWritable> model = r[k];
         if (model.count() > significant) {
           System.out.print("m" + k + model.toString() + ", ");
         }
@@ -180,28 +178,29 @@
     generateSamples(100, 2, 0, 1);
     generateSamples(100, 0, 2, 1);
     generateSamples(100, 2, 2, 1);
-    DirichletState<Vector> state = new DirichletState<Vector>(
+    DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(
         new SampledNormalDistribution(), 20, 1.0, 1, 0);
 
-    List<Model<Vector>[]> models = new ArrayList<Model<Vector>[]>();
+    List<Model<VectorWritable>[]> models = new ArrayList<Model<VectorWritable>[]>();
 
     for (int iteration = 0; iteration < 10; iteration++) {
       DirichletMapper mapper = new DirichletMapper();
       mapper.configure(state);
-      DummyOutputCollector<Text, Vector> mapCollector = new DummyOutputCollector<Text, Vector>();
-      for (Vector v : sampleData) {
+      DummyOutputCollector<Text, VectorWritable> mapCollector = new DummyOutputCollector<Text, VectorWritable>();
+      for (VectorWritable v : sampleData) {
         mapper.map(null, v, mapCollector, null);
       }
 
       DirichletReducer reducer = new DirichletReducer();
       reducer.configure(state);
-      OutputCollector<Text,DirichletCluster<Vector>> reduceCollector = new DummyOutputCollector<Text, DirichletCluster<Vector>>();
+      OutputCollector<Text,DirichletCluster<VectorWritable>> reduceCollector =
+          new DummyOutputCollector<Text, DirichletCluster<VectorWritable>>();
       for (String key : mapCollector.getKeys()) {
         reducer.reduce(new Text(key), mapCollector.getValue(key).iterator(),
             reduceCollector, null);
       }
 
-      Model<Vector>[] newModels = reducer.getNewModels();
+      Model<VectorWritable>[] newModels = reducer.getNewModels();
       state.update(newModels);
       models.add(newModels);
     }
@@ -247,12 +246,12 @@
 
   public void testSampledNormalDistributionSerialization() {
     SampledNormalDistribution dist = new SampledNormalDistribution();
-    Model<Vector>[] models = dist.sampleFromPrior(20);
+    Model<VectorWritable>[] models = dist.sampleFromPrior(20);
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
     Gson gson = builder.create();
     String jsonString = gson.toJson(models);
-    Model<Vector>[] models2 = gson.fromJson(jsonString, SampledNormalModel[].class);
+    Model<VectorWritable>[] models2 = gson.fromJson(jsonString, SampledNormalModel[].class);
     assertEquals("models", models.length, models2.length);
     for (int i = 0; i < models.length; i++) {
       assertEquals("model[" + i + ']', models[i].toString(), models2[i]
@@ -276,12 +275,12 @@
 
   public void testAsymmetricSampledNormalDistributionSerialization() {
     AsymmetricSampledNormalDistribution dist = new AsymmetricSampledNormalDistribution();
-    Model<Vector>[] models = dist.sampleFromPrior(20);
+    Model<VectorWritable>[] models = dist.sampleFromPrior(20);
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
     Gson gson = builder.create();
     String jsonString = gson.toJson(models);
-    Model<Vector>[] models2 = gson.fromJson(jsonString,
+    Model<VectorWritable>[] models2 = gson.fromJson(jsonString,
         AsymmetricSampledNormalModel[].class);
     assertEquals("models", models.length, models2.length);
     for (int i = 0; i < models.length; i++) {
@@ -297,7 +296,7 @@
         .registerTypeAdapter(ModelHolder.class, new JsonModelHolderAdapter());
     Gson gson = builder.create();
     double[] d = {1.1, 2.2};
-    ModelHolder<Vector> mh = new ModelHolder<Vector>(new NormalModel(new DenseVector(d), 3.3));
+    ModelHolder<VectorWritable> mh = new ModelHolder<VectorWritable>(new NormalModel(new DenseVector(d), 3.3));
     String format = gson.toJson(mh);
     ModelHolder<Vector> mh2 = gson.<ModelHolder<Vector>>fromJson(format, ModelHolder.class);
     assertEquals("mh", mh.getModel().toString(), mh2.getModel().toString());
@@ -311,7 +310,7 @@
     Gson gson = builder.create();
     double[] d = {1.1, 2.2};
     double[] s = {3.3, 4.4};
-    ModelHolder<Vector> mh = new ModelHolder<Vector>(new AsymmetricSampledNormalModel(
+    ModelHolder<VectorWritable> mh = new ModelHolder<VectorWritable>(new AsymmetricSampledNormalModel(
         new DenseVector(d), new DenseVector(s)));
     String format = gson.toJson(mh);
     ModelHolder<Vector> mh2 = gson.<ModelHolder<Vector>>fromJson(format, ModelHolder.class);
@@ -323,7 +322,7 @@
     builder.registerTypeAdapter(DirichletState.class,
         new JsonDirichletStateAdapter());
     Gson gson = builder.create();
-    DirichletState<Vector> state = new DirichletState<Vector>(new SampledNormalDistribution(),
+    DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(),
         20, 1, 1, 0);
     String format = gson.toJson(state);
     DirichletState<?> state2 = gson.fromJson(format, DirichletState.class);
@@ -346,20 +345,17 @@
     generateSamples(100, 2, 0, 0.2);
     generateSamples(100, 0, 2, 0.3);
     generateSamples(100, 2, 2, 1);
-    ClusteringTestUtils.writePointsToFile(sampleData, "input/data.txt", fs,
-        conf);
+    ClusteringTestUtils.writePointsToFile(sampleData, "input/data.txt", fs, conf);
     // Now run the driver
-    DirichletDriver
-        .runJob(
+    DirichletDriver.runJob(
             "input",
             "output",
             "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution",
             20, 10, 1.0, 1);
     // and inspect results
-    List<List<DirichletCluster<Vector>>> clusters = new ArrayList<List<DirichletCluster<Vector>>>();
+    List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
     JobConf conf = new JobConf(KMeansDriver.class);
-    conf
-        .set(DirichletDriver.MODEL_FACTORY_KEY,
+    conf.set(DirichletDriver.MODEL_FACTORY_KEY,
             "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
     conf.set(DirichletDriver.NUM_CLUSTERS_KEY, Integer.toString(20));
     conf.set(DirichletDriver.ALPHA_0_KEY, Double.toString(1.0));
@@ -371,12 +367,12 @@
   }
 
   private static void printResults(
-      List<List<DirichletCluster<Vector>>> clusters, int significant) {
+      List<List<DirichletCluster<VectorWritable>>> clusters, int significant) {
     int row = 0;
-    for (List<DirichletCluster<Vector>> r : clusters) {
+    for (List<DirichletCluster<VectorWritable>> r : clusters) {
       System.out.print("sample[" + row++ + "]= ");
       for (int k = 0; k < r.size(); k++) {
-        Model<Vector> model = r.get(k).getModel();
+        Model<VectorWritable> model = r.get(k).getModel();
         if (model.count() > significant) {
           int total = (int) r.get(k).getTotalCount();
           System.out.print("m" + k + '(' + total + ')' + model.toString()
@@ -403,7 +399,7 @@
             "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution",
             20, 15, 1.0, 1);
     // and inspect results
-    List<List<DirichletCluster<Vector>>> clusters = new ArrayList<List<DirichletCluster<Vector>>>();
+    List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
     JobConf conf = new JobConf(KMeansDriver.class);
     conf
         .set(DirichletDriver.MODEL_FACTORY_KEY,
@@ -421,15 +417,15 @@
     generateSamples(500, 0, 0, 0.5);
     ClusteringTestUtils.writePointsToFile(sampleData, "input/data1.txt", fs,
         conf);
-    sampleData = new ArrayList<Vector>();
+    sampleData = new ArrayList<VectorWritable>();
     generateSamples(500, 2, 0, 0.2);
     ClusteringTestUtils.writePointsToFile(sampleData, "input/data2.txt", fs,
         conf);
-    sampleData = new ArrayList<Vector>();
+    sampleData = new ArrayList<VectorWritable>();
     generateSamples(500, 0, 2, 0.3);
     ClusteringTestUtils.writePointsToFile(sampleData, "input/data3.txt", fs,
         conf);
-    sampleData = new ArrayList<Vector>();
+    sampleData = new ArrayList<VectorWritable>();
     generateSamples(500, 2, 2, 1);
     ClusteringTestUtils.writePointsToFile(sampleData, "input/data4.txt", fs,
         conf);
@@ -450,7 +446,7 @@
             "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution",
             20, 15, 1.0, 2);
     // and inspect results
-    List<List<DirichletCluster<Vector>>> clusters = new ArrayList<List<DirichletCluster<Vector>>>();
+    List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
     JobConf conf = new JobConf(KMeansDriver.class);
     conf
         .set(DirichletDriver.MODEL_FACTORY_KEY,
@@ -473,15 +469,15 @@
     generateSamples(500, 0, 0, 0.5, 1.0);
     ClusteringTestUtils.writePointsToFile(sampleData, "input/data1.txt", fs,
         conf);
-    sampleData = new ArrayList<Vector>();
+    sampleData = new ArrayList<VectorWritable>();
     generateSamples(500, 2, 0, 0.2);
     ClusteringTestUtils.writePointsToFile(sampleData, "input/data2.txt", fs,
         conf);
-    sampleData = new ArrayList<Vector>();
+    sampleData = new ArrayList<VectorWritable>();
     generateSamples(500, 0, 2, 0.3);
     ClusteringTestUtils.writePointsToFile(sampleData, "input/data3.txt", fs,
         conf);
-    sampleData = new ArrayList<Vector>();
+    sampleData = new ArrayList<VectorWritable>();
     generateSamples(500, 2, 2, 1);
     ClusteringTestUtils.writePointsToFile(sampleData, "input/data4.txt", fs,
         conf);
@@ -493,7 +489,7 @@
             "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution",
             20, 15, 1.0, 2);
     // and inspect results
-    List<List<DirichletCluster<Vector>>> clusters = new ArrayList<List<DirichletCluster<Vector>>>();
+    List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
     JobConf conf = new JobConf(KMeansDriver.class);
     conf
         .set(

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java Wed Jan 13 08:01:34 2010
@@ -31,6 +31,7 @@
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.DummyOutputCollector;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import org.apache.mahout.math.VectorWritable;
 
 import java.io.File;
 import java.util.ArrayList;
@@ -84,7 +85,7 @@
 
   }
 
-  public static void referenceFuzzyKMeans(List<Vector> points,
+  public static void referenceFuzzyKMeans(List<VectorWritable> points,
                                           List<SoftCluster> clusterList, Map<String, String> pointClusterInfo,
                                           String distanceMeasureClass, double threshold, double m, int numIter)
       throws Exception {
@@ -100,10 +101,11 @@
     computeCluster(points, clusterList, clusterer, pointClusterInfo);
   }
 
-  public static boolean iterateReference(List<Vector> points,
+  public static boolean iterateReference(List<VectorWritable> points,
                                          List<SoftCluster> clusterList, FuzzyKMeansClusterer clusterer) {
     // for each
-    for (Vector point : points) {
+    for (VectorWritable pointWritable : points) {
+      Vector point = pointWritable.get();
       List<Double> clusterDistanceList = new ArrayList<Double>();
       for (SoftCluster cluster : clusterList) {
         clusterDistanceList.add(clusterer.getMeasure().distance(point, cluster.getCenter()));
@@ -132,11 +134,12 @@
 
   }
 
-  public static void computeCluster(List<Vector> points,
+  public static void computeCluster(List<VectorWritable> points,
                                     List<SoftCluster> clusterList, FuzzyKMeansClusterer clusterer,
                                     Map<String, String> pointClusterInfo) {
 
-    for (Vector point : points) {
+    for (VectorWritable pointWritable : points) {
+      Vector point = pointWritable.get();
       StringBuilder outputValue = new StringBuilder("[");
       List<Double> clusterDistanceList = new ArrayList<Double>();
       for (SoftCluster cluster : clusterList) {
@@ -156,15 +159,14 @@
   }
 
   public void testReferenceImplementation() throws Exception {
-    List<Vector> points = TestKmeansClustering
-        .getPoints(TestKmeansClustering.reference);
+    List<VectorWritable> points = TestKmeansClustering.getPoints(TestKmeansClustering.reference);
     for (int k = 0; k < points.size(); k++) {
       System.out.println("test k= " + k);
 
       List<SoftCluster> clusterList = new ArrayList<SoftCluster>();
       // pick k initial cluster centers at random
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = tweakValue(points.get(i));
+        Vector vec = tweakValue(points.get(i).get());
         SoftCluster cluster = new SoftCluster(vec);
         // add the center so the centroid will be correct upon output
         cluster.addPoint(cluster.getCenter(), 1);
@@ -197,8 +199,7 @@
   }
 
   public void testFuzzyKMeansMRJob() throws Exception {
-    List<Vector> points = TestKmeansClustering
-        .getPoints(TestKmeansClustering.reference);
+    List<VectorWritable> points = TestKmeansClustering.getPoints(TestKmeansClustering.reference);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();
@@ -233,7 +234,7 @@
       SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path("testdata/clusters/part-00000"),
           Text.class, SoftCluster.class);
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = tweakValue(points.get(i));
+        Vector vec = tweakValue(points.get(i).get());
 
         SoftCluster cluster = new SoftCluster(vec);
         // add the center so the centroid will be correct upon output
@@ -254,7 +255,7 @@
       // now run the Job      
       FuzzyKMeansDriver.runJob("testdata/points", "testdata/clusters",
           "output", EuclideanDistanceMeasure.class.getName(), 0.001, 2, 1,
-          k + 1, 2, SparseVector.class);
+          k + 1, 2);
 
       // now compare the expected clusters with actual
       File outDir = new File("output/points");
@@ -292,8 +293,7 @@
   }
 
   public void testFuzzyKMeansMapper() throws Exception {
-    List<Vector> points = TestKmeansClustering
-        .getPoints(TestKmeansClustering.reference);
+    List<VectorWritable> points = TestKmeansClustering.getPoints(TestKmeansClustering.reference);
 
     for (int k = 0; k < points.size(); k++) {
       System.out.println("testKFuzzyKMeansMRJob k= " + k);
@@ -301,7 +301,7 @@
       List<SoftCluster> clusterList = new ArrayList<SoftCluster>();
 
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = tweakValue(points.get(i));
+        Vector vec = tweakValue(points.get(i).get());
 
         SoftCluster cluster = new SoftCluster(vec, i);
         cluster.addPoint(cluster.getCenter(), 1);
@@ -319,7 +319,7 @@
       mapper.configure(conf);
       
       DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
-      for (Vector point : points) {
+      for (VectorWritable point : points) {
         mapper.map(new Text(), point, mapCollector, null);
       }
 
@@ -354,8 +354,7 @@
   }
 
   public void testFuzzyKMeansCombiner() throws Exception {
-    List<Vector> points = TestKmeansClustering
-        .getPoints(TestKmeansClustering.reference);
+    List<VectorWritable> points = TestKmeansClustering.getPoints(TestKmeansClustering.reference);
 
     for (int k = 0; k < points.size(); k++) {
       System.out.println("testKFuzzyKMeansMRJob k= " + k);
@@ -363,7 +362,7 @@
       List<SoftCluster> clusterList = new ArrayList<SoftCluster>();
 
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = tweakValue(points.get(i));
+        Vector vec = tweakValue(points.get(i).get());
 
         SoftCluster cluster = new SoftCluster(vec, i);
         cluster.addPoint(cluster.getCenter(), 1);
@@ -381,9 +380,8 @@
       mapper.configure(conf);
       
       DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
-      for (Vector point : points) {
-        mapper.map(new Text(), point, mapCollector,
-            null);
+      for (VectorWritable point : points) {
+        mapper.map(new Text(), point, mapCollector, null);
       }
 
       // run combiner
@@ -409,8 +407,7 @@
   }
 
   public void testFuzzyKMeansReducer() throws Exception {
-    List<Vector> points = TestKmeansClustering
-        .getPoints(TestKmeansClustering.reference);
+    List<VectorWritable> points = TestKmeansClustering.getPoints(TestKmeansClustering.reference);
 
     for (int k = 0; k < points.size(); k++) {
       System.out.println("testKFuzzyKMeansMRJob k= " + k);
@@ -418,7 +415,7 @@
       List<SoftCluster> clusterList = new ArrayList<SoftCluster>();
 
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = tweakValue(points.get(i));
+        Vector vec = tweakValue(points.get(i).get());
 
         SoftCluster cluster = new SoftCluster(vec, i);
         // cluster.addPoint(cluster.getCenter(), 1);
@@ -436,7 +433,7 @@
       mapper.configure(conf);
       
       DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
-      for (Vector point : points) {
+      for (VectorWritable point : points) {
         mapper.map(new Text(), point, mapCollector,
             null);
       }
@@ -470,7 +467,7 @@
       // compute the reference result after one iteration and compare
       List<SoftCluster> reference = new ArrayList<SoftCluster>();
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = tweakValue(points.get(i));
+        Vector vec = tweakValue(points.get(i).get());
         reference.add(new SoftCluster(vec, i));
       }
       
@@ -492,8 +489,7 @@
   }
 
   public void testFuzzyKMeansClusterMapper() throws Exception {
-    List<Vector> points = TestKmeansClustering
-        .getPoints(TestKmeansClustering.reference);
+    List<VectorWritable> points = TestKmeansClustering.getPoints(TestKmeansClustering.reference);
 
     for (int k = 0; k < points.size(); k++) {
       System.out.println("testKFuzzyKMeansMRJob k= " + k);
@@ -501,7 +497,7 @@
       List<SoftCluster> clusterList = new ArrayList<SoftCluster>();
 
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = tweakValue(points.get(i));
+        Vector vec = tweakValue(points.get(i).get());
 
         SoftCluster cluster = new SoftCluster(vec, i);
         cluster.addPoint(cluster.getCenter(), 1);
@@ -519,9 +515,8 @@
       mapper.configure(conf);
       
       DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
-      for (Vector point : points) {
-        mapper.map(new Text(), point, mapCollector,
-            null);
+      for (VectorWritable point : points) {
+        mapper.map(new Text(), point, mapCollector, null);
       }
       for (SoftCluster softCluster : clusterList) {
         softCluster.recomputeCenter();
@@ -568,7 +563,7 @@
       clusterMapper.config(reducerCluster);
       clusterMapper.configure(conf);
       
-      for (Vector point : points) {
+      for (VectorWritable point : points) {
         clusterMapper.map(new Text(), point, clusterMapperCollector, null);
       }
 
@@ -577,7 +572,7 @@
       // compute the reference result after one iteration and compare
       List<SoftCluster> reference = new ArrayList<SoftCluster>();
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = tweakValue(points.get(i));
+        Vector vec = tweakValue(points.get(i).get());
         reference.add(new SoftCluster(vec, i));
       }
       Map<String, String> pointClusterInfo = new HashMap<String, String>();

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Wed Jan 13 08:01:34 2010
@@ -26,10 +26,7 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.clustering.ClusteringTestUtils;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
-import org.apache.mahout.math.AbstractVector;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.SparseVector;
-import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.*;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.DummyOutputCollector;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -87,7 +84,7 @@
    * @param maxIter  the maximum number of iterations
    * @param convergenceDelta threshold until cluster is considered stable
    */
-  private static void referenceKmeans(List<Vector> points, List<Cluster> clusters,
+  private static void referenceKmeans(List<VectorWritable> points, List<Cluster> clusters,
                                DistanceMeasure measure, int maxIter, double convergenceDelta) {
     boolean converged = false;
     int iteration = 0;
@@ -105,10 +102,11 @@
    * @param measure  a DistanceMeasure to use
    * @param convergenceDelta threshold until cluster is considered stable
    */
-  private static boolean iterateReference(List<Vector> points, List<Cluster> clusters,
+  private static boolean iterateReference(List<VectorWritable> points, List<Cluster> clusters,
                                    DistanceMeasure measure, double convergenceDelta) {
     // iterate through all points, assigning each to the nearest cluster
-    for (Vector point : points) {
+    for (VectorWritable pointWritable : points) {
+      Vector point = pointWritable.get();
       Cluster closestCluster = null;
       double closestDistance = Double.MAX_VALUE;
       for (Cluster cluster : clusters) {
@@ -136,20 +134,20 @@
     return converged;
   }
 
-  public static List<Vector> getPoints(double[][] raw) {
-    List<Vector> points = new ArrayList<Vector>();
+  public static List<VectorWritable> getPoints(double[][] raw) {
+    List<VectorWritable> points = new ArrayList<VectorWritable>();
     for (int i = 0; i < raw.length; i++) {
       double[] fr = raw[i];
       Vector vec = new SparseVector(String.valueOf(i), fr.length);
       vec.assign(fr);
-      points.add(vec);
+      points.add(new VectorWritable(vec));
     }
     return points;
   }
 
   /** Story: Test the reference implementation */
   public void testReferenceImplementation() throws Exception {
-    List<Vector> points = getPoints(reference);
+    List<VectorWritable> points = getPoints(reference);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     // try all possible values of k
     for (int k = 0; k < points.size(); k++) {
@@ -157,7 +155,7 @@
       // pick k initial cluster centers at random
       List<Cluster> clusters = new ArrayList<Cluster>();
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = points.get(i);
+        Vector vec = points.get(i).get();
         clusters.add(new VisibleCluster(vec));
       }
       // iterate clusters until they converge
@@ -189,14 +187,14 @@
     conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
     mapper.configure(conf);
-    List<Vector> points = getPoints(reference);
+    List<VectorWritable> points = getPoints(reference);
     for (int k = 0; k < points.size(); k++) {
       // pick k initial cluster centers at random
       DummyOutputCollector<Text, KMeansInfo> collector = new DummyOutputCollector<Text, KMeansInfo>();
       List<Cluster> clusters = new ArrayList<Cluster>();
 
       for (int i = 0; i < k + 1; i++) {
-        Cluster cluster = new Cluster(points.get(i), i);
+        Cluster cluster = new Cluster(points.get(i).get(), i);
         // add the center so the centroid will be correct upon output
         cluster.addPoint(cluster.getCenter());
         clusters.add(cluster);
@@ -204,7 +202,7 @@
       mapper.config(clusters);
 
       // map the data
-      for (Vector point : points) {
+      for (VectorWritable point : points) {
         mapper.map(new Text(), point, collector, null);
       }
       assertEquals("Number of map results", k + 1, collector.getData().size());
@@ -234,13 +232,13 @@
     conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
     mapper.configure(conf);
-    List<Vector> points = getPoints(reference);
+    List<VectorWritable> points = getPoints(reference);
     for (int k = 0; k < points.size(); k++) {
       // pick k initial cluster centers at random
       DummyOutputCollector<Text, KMeansInfo> collector = new DummyOutputCollector<Text, KMeansInfo>();
       List<Cluster> clusters = new ArrayList<Cluster>();
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = points.get(i);
+        Vector vec = points.get(i).get();
 
         Cluster cluster = new Cluster(vec, i);
         // add the center so the centroid will be correct upon output
@@ -249,7 +247,7 @@
       }
       mapper.config(clusters);
       // map the data
-      for (Vector point : points) {
+      for (VectorWritable point : points) {
         mapper.map(new Text(), point, collector,
             null);
       }
@@ -289,14 +287,14 @@
     conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
     mapper.configure(conf);
-    List<Vector> points = getPoints(reference);
+    List<VectorWritable> points = getPoints(reference);
     for (int k = 0; k < points.size(); k++) {
       System.out.println("K = " + k);
       // pick k initial cluster centers at random
       DummyOutputCollector<Text, KMeansInfo> collector = new DummyOutputCollector<Text, KMeansInfo>();
       List<Cluster> clusters = new ArrayList<Cluster>();
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = points.get(i);
+        Vector vec = points.get(i).get();
         Cluster cluster = new Cluster(vec, i);
         // add the center so the centroid will be correct upon output
         // cluster.addPoint(cluster.getCenter());
@@ -304,7 +302,7 @@
       }
       mapper.config(clusters);
       // map the data
-      for (Vector point : points) {
+      for (VectorWritable point : points) {
         mapper.map(new Text(), point, collector,
             null);
       }
@@ -331,7 +329,7 @@
       // compute the reference result after one iteration and compare
       List<Cluster> reference = new ArrayList<Cluster>();
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = points.get(i);
+        Vector vec = points.get(i).get();
         reference.add(new Cluster(vec, i));
       }
       boolean converged = iterateReference(points, reference,
@@ -368,7 +366,7 @@
 
   /** Story: User wishes to run kmeans job on reference data */
   public void testKMeansMRJob() throws Exception {
-    List<Vector> points = getPoints(reference);
+    List<VectorWritable> points = getPoints(reference);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();
@@ -391,7 +389,7 @@
           Text.class, Cluster.class);
 
       for (int i = 0; i < k + 1; i++) {
-        Vector vec = points.get(i);
+        Vector vec = points.get(i).get();
 
         Cluster cluster = new Cluster(vec, i);
         // add the center so the centroid will be correct upon output
@@ -401,8 +399,13 @@
       writer.close();
       // now run the Job
       HadoopUtil.overwriteOutput("output");
-      KMeansDriver.runJob("testdata/points", "testdata/clusters", "output",
-          EuclideanDistanceMeasure.class.getName(), 0.001, 10, k + 1, SparseVector.class);
+      KMeansDriver.runJob("testdata/points",
+          "testdata/clusters",
+          "output",
+          EuclideanDistanceMeasure.class.getName(),
+          0.001,
+          10,
+          k + 1);
       // now compare the expected clusters with actual
       File outDir = new File("output/points");
       assertTrue("output dir exists?", outDir.exists());
@@ -440,7 +443,7 @@
 
   /** Story: User wants to use canopy clustering to input the initial clusters for kmeans job. */
   public void textKMeansWithCanopyClusterInput() throws Exception {
-    List<Vector> points = getPoints(reference);
+    List<VectorWritable> points = getPoints(reference);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();
@@ -454,12 +457,16 @@
     ClusteringTestUtils.writePointsToFile(points, "testdata/points/file2", fs, conf);
 
     // now run the Canopy job
-    CanopyDriver.runJob("testdata/points", "testdata/canopies",
-        ManhattanDistanceMeasure.class.getName(), 3.1, 2.1, SparseVector.class);
+    CanopyDriver.runJob("testdata/points", "testdata/canopies", ManhattanDistanceMeasure.class.getName(), 3.1, 2.1);
 
     // now run the KMeans job
-    KMeansDriver.runJob("testdata/points", "testdata/canopies", "output",
-        EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, SparseVector.class);
+    KMeansDriver.runJob("testdata/points", 
+        "testdata/canopies",
+        "output",
+        EuclideanDistanceMeasure.class.getName(),
+        0.001,
+        10,
+        1);
 
     // now compare the expected clusters with actual
     File outDir = new File("output/points");

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java Wed Jan 13 08:01:34 2010
@@ -35,6 +35,7 @@
 import org.apache.mahout.clustering.ClusteringTestUtils;
 import org.apache.mahout.math.SparseVector;
 import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
 
 public class TestRandomSeedGenerator extends TestCase {
   
@@ -43,13 +44,13 @@
   
   private FileSystem fs;
   
-  private static List<Vector> getPoints(double[][] raw) {
-    List<Vector> points = new ArrayList<Vector>();
+  private static List<VectorWritable> getPoints(double[][] raw) {
+    List<VectorWritable> points = new ArrayList<VectorWritable>();
     int i = 0;
     for (double[] fr : raw) {
       Vector vec = new SparseVector(String.valueOf(i++), fr.length);
       vec.assign(fr);
-      points.add(vec);
+      points.add(new VectorWritable(vec));
     }
     return points;
   }
@@ -77,7 +78,7 @@
   
   /** Story: test random seed generation generates 4 clusters with proper ids and data */
   public void testRandomSeedGenerator() throws Exception {
-    List<Vector> points = getPoints(raw);
+    List<VectorWritable> points = getPoints(raw);
     File testData = new File("testdata");
     if (!testData.exists()) {
       testData.mkdir();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java Wed Jan 13 08:01:34 2010
@@ -30,6 +30,7 @@
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplayCanopy extends DisplayDirichlet {
   DisplayCanopy() {
@@ -73,7 +74,7 @@
    * @return the List<Canopy> created
    */
   static List<Canopy> populateCanopies(DistanceMeasure measure,
-      List<Vector> points, double t1, double t2) {
+      List<VectorWritable> points, double t1, double t2) {
     List<Canopy> canopies = new ArrayList<Canopy>();
     /**
      * Reference Implementation: Given a distance metric, one can create
@@ -87,13 +88,13 @@
      */
     int nextCanopyId = 0;
     while (!points.isEmpty()) {
-      Iterator<Vector> ptIter = points.iterator();
-      Vector p1 = ptIter.next();
+      Iterator<VectorWritable> ptIter = points.iterator();
+      Vector p1 = ptIter.next().get();
       ptIter.remove();
       Canopy canopy = new Canopy(p1, nextCanopyId++);
       canopies.add(canopy);
       while (ptIter.hasNext()) {
-        Vector p2 = ptIter.next();
+        Vector p2 = ptIter.next().get();
         double dist = measure.distance(p1, p2);
         // Put all points that are within distance threshold T1 into the canopy
         if (dist < t1)
@@ -109,7 +110,7 @@
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
     generateSamples();
-    List<Vector> points = new ArrayList<Vector>();
+    List<VectorWritable> points = new ArrayList<VectorWritable>();
     points.addAll(sampleData);
     canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
     new DisplayCanopy();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java Wed Jan 13 08:01:34 2010
@@ -27,12 +27,12 @@
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 class Display2dASNDirichlet extends DisplayDirichlet {
   Display2dASNDirichlet() {
     initialize();
-    this
-        .setTitle("Dirichlet Process Clusters - 2-d Asymmetric Sampled Normal Distribution (>"
+    this.setTitle("Dirichlet Process Clusters - 2-d Asymmetric Sampled Normal Distribution (>"
             + (int) (significance * 100) + "% of population)");
   }
 
@@ -43,10 +43,10 @@
 
     Vector dv = new DenseVector(2);
     int i = result.size() - 1;
-    for (Model<Vector>[] models : result) {
+    for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
       g2.setColor(colors[Math.min(colors.length - 1, i--)]);
-      for (Model<Vector> m : models) {
+      for (Model<VectorWritable> m : models) {
         AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
         dv.assign(mm.getStdDev().times(3));
         if (isSignificant(mm))

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java Wed Jan 13 08:01:34 2010
@@ -27,6 +27,7 @@
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplayASNDirichlet extends DisplayDirichlet {
   DisplayASNDirichlet() {
@@ -43,10 +44,10 @@
 
     Vector dv = new DenseVector(2);
     int i = result.size() - 1;
-    for (Model<Vector>[] models : result) {
+    for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
       g2.setColor(colors[Math.min(colors.length - 1, i--)]);
-      for (Model<Vector> m : models) {
+      for (Model<VectorWritable> m : models) {
         AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
         dv.assign(mm.getStdDev().times(3));
         if (isSignificant(mm))

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java Wed Jan 13 08:01:34 2010
@@ -35,6 +35,7 @@
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.FileLineIterable;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplayASNOutputState extends DisplayDirichlet {
   DisplayASNOutputState() {
@@ -50,10 +51,10 @@
 
     Vector dv = new DenseVector(2);
     int i = result.size() - 1;
-    for (Model<Vector>[] models : result) {
+    for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
       g2.setColor(colors[Math.min(colors.length - 1, i--)]);
-      for (Model<Vector> m : models) {
+      for (Model<VectorWritable> m : models) {
         AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
         dv.set(0, mm.getStdDev().get(0) * 3);
         dv.set(1, mm.getStdDev().get(1) * 3);
@@ -72,10 +73,10 @@
    * @throws IOException
    *             if there is an error
    */
-  public static List<Vector> readFile(String fileName) throws IOException {
-    List<Vector> results = new ArrayList<Vector>();
+  public static List<VectorWritable> readFile(String fileName) throws IOException {
+    List<VectorWritable> results = new ArrayList<VectorWritable>();
     for (String line : new FileLineIterable(new File(fileName))) {
-      results.add(AbstractVector.decodeVector(line));
+      results.add(new VectorWritable(AbstractVector.decodeVector(line)));
     }
     return results;
   }
@@ -87,7 +88,7 @@
   }
 
   private static void getResults() throws IOException {
-    result = new ArrayList<Model<Vector>[]>();
+    result = new ArrayList<Model<VectorWritable>[]>();
     JobConf conf = new JobConf(KMeansDriver.class);
     conf
         .set(DirichletDriver.MODEL_FACTORY_KEY,
@@ -97,8 +98,7 @@
     File f = new File("output");
     for (File g : f.listFiles()) {
       conf.set(DirichletDriver.STATE_IN_KEY, g.getCanonicalPath());
-      DirichletState<Vector> dirichletState = DirichletMapper
-          .getDirichletState(conf);
+      DirichletState<VectorWritable> dirichletState = DirichletMapper.getDirichletState(conf);
       result.add(dirichletState.getModels());
     }
   }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java Wed Jan 13 08:01:34 2010
@@ -37,6 +37,7 @@
 import org.apache.mahout.math.TimesFunction;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 public class DisplayDirichlet extends Frame {
 
@@ -46,9 +47,9 @@
 
   protected static final int size = 8; // screen size in inches
 
-  protected static final List<Vector> sampleData = new ArrayList<Vector>();
+  protected static final List<VectorWritable> sampleData = new ArrayList<VectorWritable>();
 
-  protected static List<Model<Vector>[]> result;
+  protected static List<Model<VectorWritable>[]> result;
 
   protected static final double significance = 0.05;
 
@@ -133,8 +134,8 @@
     // plot the sample data
     g2.setColor(Color.DARK_GRAY);
     dv.assign(0.03);
-    for (Vector v : sampleData)
-      plotRectangle(g2, v, dv);
+    for (VectorWritable v : sampleData)
+      plotRectangle(g2, v.get(), dv);
   }
 
   /**
@@ -172,12 +173,12 @@
             * ds));
   }
 
-  private static void printModels(List<Model<Vector>[]> results, int significant) {
+  private static void printModels(List<Model<VectorWritable>[]> results, int significant) {
     int row = 0;
-    for (Model<Vector>[] r : results) {
+    for (Model<VectorWritable>[] r : results) {
       System.out.print("sample[" + row++ + "]= ");
       for (int k = 0; k < r.length; k++) {
-        Model<Vector> model = r[k];
+        Model<VectorWritable> model = r[k];
         if (model.count() > significant) {
           System.out.print("m" + k + model.toString() + ", ");
         }
@@ -212,9 +213,9 @@
     System.out.println("Generating " + num + " samples m=[" + mx + ", " + my
         + "] sd=" + sd);
     for (int i = 0; i < num; i++)
-      sampleData.add(new DenseVector(new double[] {
+      sampleData.add(new VectorWritable(new DenseVector(new double[] {
           UncommonDistributions.rNorm(mx, sd),
-          UncommonDistributions.rNorm(my, sd) }));
+          UncommonDistributions.rNorm(my, sd) })));
   }
 
   /**
@@ -232,19 +233,19 @@
     System.out.println("Generating " + num + " samples m=[" + mx + ", " + my
         + "] sd=[" + sdx + ", " + sdy + ']');
     for (int i = 0; i < num; i++)
-      sampleData.add(new DenseVector(new double[] {
+      sampleData.add(new VectorWritable(new DenseVector(new double[] {
           UncommonDistributions.rNorm(mx, sdx),
-          UncommonDistributions.rNorm(my, sdy) }));
+          UncommonDistributions.rNorm(my, sdy) })));
   }
 
-  public static void generateResults(ModelDistribution<Vector> modelDist) {
-    DirichletClusterer<Vector> dc = new DirichletClusterer<Vector>(sampleData,
+  public static void generateResults(ModelDistribution<VectorWritable> modelDist) {
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData,
         modelDist, 1.0, 10, 2, 2);
     result = dc.cluster(20);
     printModels(result, 5);
   }
 
-  public static boolean isSignificant(Model<Vector> model) {
+  public static boolean isSignificant(Model<VectorWritable> model) {
     return (((double) model.count() / sampleData.size()) > significance);
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java Wed Jan 13 08:01:34 2010
@@ -27,6 +27,7 @@
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplayNDirichlet extends DisplayDirichlet {
   DisplayNDirichlet() {
@@ -42,10 +43,10 @@
 
     Vector dv = new DenseVector(2);
     int i = result.size() - 1;
-    for (Model<Vector>[] models : result) {
+    for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
       g2.setColor(colors[Math.min(colors.length - 1, i--)]);
-      for (Model<Vector> m : models) {
+      for (Model<VectorWritable> m : models) {
         NormalModel mm = (NormalModel) m;
         dv.assign(mm.getStdDev() * 3);
         if (isSignificant(mm))

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java Wed Jan 13 08:01:34 2010
@@ -35,6 +35,7 @@
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.FileLineIterable;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplayOutputState extends DisplayDirichlet {
   DisplayOutputState() {
@@ -50,10 +51,10 @@
 
     Vector dv = new DenseVector(2);
     int i = result.size() - 1;
-    for (Model<Vector>[] models : result) {
+    for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
       g2.setColor(colors[Math.min(colors.length - 1, i--)]);
-      for (Model<Vector> m : models) {
+      for (Model<VectorWritable> m : models) {
         NormalModel mm = (NormalModel) m;
         dv.assign(mm.getStdDev() * 3);
         if (isSignificant(mm))
@@ -71,10 +72,10 @@
    * @throws IOException
    *             if there is an error
    */
-  public static List<Vector> readFile(String fileName) throws IOException {
-    List<Vector> results = new ArrayList<Vector>();
+  public static List<VectorWritable> readFile(String fileName) throws IOException {
+    List<VectorWritable> results = new ArrayList<VectorWritable>();
     for (String line : new FileLineIterable(new File(fileName))) {
-      results.add(AbstractVector.decodeVector(line));
+      results.add(new VectorWritable(AbstractVector.decodeVector(line)));
     }
     return results;
   }
@@ -86,7 +87,7 @@
   }
 
   private static void getResults() throws IOException {
-    result = new ArrayList<Model<Vector>[]>();
+    result = new ArrayList<Model<VectorWritable>[]>();
     JobConf conf = new JobConf(KMeansDriver.class);
     conf.set(DirichletDriver.MODEL_FACTORY_KEY,
         "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
@@ -95,8 +96,7 @@
     File f = new File("output");
     for (File g : f.listFiles()) {
       conf.set(DirichletDriver.STATE_IN_KEY, g.getCanonicalPath());
-      DirichletState<Vector> dirichletState = DirichletMapper
-          .getDirichletState(conf);
+      DirichletState<VectorWritable> dirichletState = DirichletMapper.getDirichletState(conf);
       result.add(dirichletState.getModels());
     }
   }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java Wed Jan 13 08:01:34 2010
@@ -27,6 +27,7 @@
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplaySNDirichlet extends DisplayDirichlet {
   DisplaySNDirichlet() {
@@ -42,10 +43,10 @@
 
     Vector dv = new DenseVector(2);
     int i = result.size() - 1;
-    for (Model<Vector>[] models : result) {
+    for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
       g2.setColor(colors[Math.min(colors.length - 1, i--)]);
-      for (Model<Vector> m : models) {
+      for (Model<VectorWritable> m : models) {
         NormalModel mm = (NormalModel) m;
         dv.assign(mm.getStdDev() * 3);
         if (isSignificant(mm))

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java Wed Jan 13 08:01:34 2010
@@ -32,6 +32,7 @@
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplayFuzzyKMeans extends DisplayDirichlet {
   DisplayFuzzyKMeans() {
@@ -63,7 +64,7 @@
     }
   }
 
-  public static void referenceFuzzyKMeans(List<Vector> points,
+  public static void referenceFuzzyKMeans(List<VectorWritable> points,
       DistanceMeasure measure, double threshold, double m, int numIter) {
     FuzzyKMeansClusterer clusterer = new FuzzyKMeansClusterer(measure, threshold, m);
     boolean converged = false;
@@ -84,22 +85,21 @@
    * 
    * @param points the List<Vector> having the input points
    * @param clusterList the List<Cluster> clusters
-   * @param measure a DistanceMeasure to use
    * @return
    */
-  public static boolean iterateReference(List<Vector> points,
+  public static boolean iterateReference(List<VectorWritable> points,
       List<SoftCluster> clusterList, FuzzyKMeansClusterer clusterer) {
     // for each
-    for (Vector point : points) {
+    for (VectorWritable point : points) {
       List<Double> clusterDistanceList = new ArrayList<Double>();
       for (SoftCluster cluster : clusterList) {
-        clusterDistanceList.add(clusterer.getMeasure().distance(point, cluster.getCenter()));
+        clusterDistanceList.add(clusterer.getMeasure().distance(point.get(), cluster.getCenter()));
       }
 
       for (int i = 0; i < clusterList.size(); i++) {
         double probWeight = clusterer.computeProbWeight(clusterDistanceList
             .get(i), clusterDistanceList);
-        clusterList.get(i).addPoint(point,
+        clusterList.get(i).addPoint(point.get(),
             Math.pow(probWeight, clusterer.getM()));
       }
     }
@@ -130,7 +130,7 @@
    * @return the List<Canopy> created
    */
   static List<Canopy> populateCanopies(DistanceMeasure measure,
-      List<Vector> points, double t1, double t2) {
+      List<VectorWritable> points, double t1, double t2) {
     List<Canopy> canopies = new ArrayList<Canopy>();
     /**
      * Reference Implementation: Given a distance metric, one can create
@@ -144,13 +144,13 @@
      */
     int nextCanopyId = 0;
     while (!points.isEmpty()) {
-      Iterator<Vector> ptIter = points.iterator();
-      Vector p1 = ptIter.next();
+      Iterator<VectorWritable> ptIter = points.iterator();
+      Vector p1 = ptIter.next().get();
       ptIter.remove();
       Canopy canopy = new Canopy(p1, nextCanopyId++);
       canopies.add(canopy);
       while (ptIter.hasNext()) {
-        Vector p2 = ptIter.next();
+        Vector p2 = ptIter.next().get();
         double dist = measure.distance(p1, p2);
         // Put all points that are within distance threshold T1 into the canopy
         if (dist < t1)
@@ -166,7 +166,7 @@
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
     generateSamples();
-    List<Vector> points = new ArrayList<Vector>();
+    List<VectorWritable> points = new ArrayList<VectorWritable>();
     points.addAll(sampleData);
     List<Canopy> canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
     DistanceMeasure measure = new ManhattanDistanceMeasure();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java?rev=898669&r1=898668&r2=898669&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java Wed Jan 13 08:01:34 2010
@@ -31,6 +31,7 @@
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
 import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplayKMeans extends DisplayDirichlet {
   DisplayKMeans() {
@@ -72,7 +73,7 @@
    * @param measure the DistanceMeasure to use
    * @param maxIter the maximum number of iterations
    */
-  private static void referenceKmeans(List<Vector> points,
+  private static void referenceKmeans(List<VectorWritable> points,
       List<List<Cluster>> clusters, DistanceMeasure measure, int maxIter) {
     boolean converged = false;
     int iteration = 0;
@@ -95,20 +96,20 @@
    * @param measure a DistanceMeasure to use
    * @return
    */
-  private static boolean iterateReference(List<Vector> points,
+  private static boolean iterateReference(List<VectorWritable> points,
       List<Cluster> clusters, DistanceMeasure measure) {
     // iterate through all points, assigning each to the nearest cluster
-    for (Vector point : points) {
+    for (VectorWritable point : points) {
       Cluster closestCluster = null;
       double closestDistance = Double.MAX_VALUE;
       for (Cluster cluster : clusters) {
-        double distance = measure.distance(cluster.getCenter(), point);
+        double distance = measure.distance(cluster.getCenter(), point.get());
         if (closestCluster == null || closestDistance > distance) {
           closestCluster = cluster;
           closestDistance = distance;
         }
       }
-      closestCluster.addPoint(point);
+      closestCluster.addPoint(point.get());
     }
     // test for convergence
     boolean converged = true;
@@ -137,7 +138,7 @@
    * @return the List<Canopy> created
    */
   static List<Canopy> populateCanopies(DistanceMeasure measure,
-      List<Vector> points, double t1, double t2) {
+      List<VectorWritable> points, double t1, double t2) {
     List<Canopy> canopies = new ArrayList<Canopy>();
     /**
      * Reference Implementation: Given a distance metric, one can create
@@ -151,13 +152,13 @@
      */
     int nextCanopyId = 0;
     while (!points.isEmpty()) {
-      Iterator<Vector> ptIter = points.iterator();
-      Vector p1 = ptIter.next();
+      Iterator<VectorWritable> ptIter = points.iterator();
+      Vector p1 = ptIter.next().get();
       ptIter.remove();
       Canopy canopy = new Canopy(p1, nextCanopyId++);
       canopies.add(canopy);
       while (ptIter.hasNext()) {
-        Vector p2 = ptIter.next();
+        Vector p2 = ptIter.next().get();
         double dist = measure.distance(p1, p2);
         // Put all points that are within distance threshold T1 into the canopy
         if (dist < t1)
@@ -173,7 +174,7 @@
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
     generateSamples();
-    List<Vector> points = new ArrayList<Vector>();
+    List<VectorWritable> points = new ArrayList<VectorWritable>();
     points.addAll(sampleData);
     List<Canopy> canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
     DistanceMeasure measure = new ManhattanDistanceMeasure();