You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/12/08 19:16:10 UTC

svn commit: r1549087 - in /mahout/trunk: ./ core/src/main/java/org/apache/mahout/clustering/classify/ core/src/test/java/org/apache/mahout/clustering/classify/ core/src/test/java/org/apache/mahout/clustering/kmeans/ integration/src/main/java/org/apache...

Author: smarthi
Date: Sun Dec  8 18:16:10 2013
New Revision: 1549087

URL: http://svn.apache.org/r1549087
Log:
MAHOUT-1030: Regression: Clustered Points Should be WeightedPropertyVectorWritable not WeightedVectorWritable

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sun Dec  8 18:16:10 2013
@@ -66,6 +66,8 @@ Release 0.9 - unreleased
 
   MAHOUT-1242: No key redistribution function for associative maps (Tharindu Rusira via smarthi)
 
+  MAHOUT-1030: Regression: Clustered Points Should be WeightedPropertyVectorWritable not WeightedVectorWritable (Andrew Musselman, Pat Ferrel, Jeff Eastman, Lars Norskog, smarthi)
+
 Release 0.8 - 2013-07-25
 
   MAHOUT-1272: Parallel SGD matrix factorizer for SVDrecommender (Peng Cheng via ssc)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java Sun Dec  8 18:16:10 2013
@@ -20,13 +20,16 @@ package org.apache.mahout.clustering.cla
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -106,7 +109,10 @@ public class ClusterClassificationMapper
     throws IOException, InterruptedException {
     Cluster cluster = clusterModels.get(clusterIndex);
     clusterId.set(cluster.getId());
-    context.write(clusterId, new WeightedVectorWritable(weight, vw.get()));
+    double d = cluster.getCenter().getDistanceSquared(vw.get());
+    Map<Text, Text> props = Maps.newHashMap();
+    props.put(new Text("distance-squared"), new Text(Double.toString(d)));
+    context.write(clusterId, new WeightedPropertyVectorWritable(weight, vw.get(), props));
   }
   
   public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java Sun Dec  8 18:16:10 2013
@@ -167,7 +167,7 @@ public class ClusterClassificationDriver
       SequenceFile.Reader classifiedVectors = new SequenceFile.Reader(fs,
           partFile.getPath(), conf);
       Writable clusterIdAsKey = new IntWritable();
-      WeightedVectorWritable point = new WeightedVectorWritable();
+      WeightedPropertyVectorWritable point = new WeightedPropertyVectorWritable();
       while (classifiedVectors.next(clusterIdAsKey, point)) {
         collectVector(clusterIdAsKey.toString(), point.getVector());
       }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Sun Dec  8 18:16:10 2013
@@ -29,7 +29,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.clustering.ClusteringTestUtils;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
 import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.DummyOutputCollector;
 import org.apache.mahout.common.MahoutTestCase;
@@ -106,30 +106,30 @@ public final class TestKmeansClustering 
   public void testRunKMeansIterationConvergesInOneRunWithGivenDistanceThreshold() {
     double[][] rawPoints = { {0, 0}, {0, 0.25}, {0, 0.75}, {0, 1}};
     List<Vector> points = getPoints(rawPoints);
-    
+
     ManhattanDistanceMeasure distanceMeasure = new ManhattanDistanceMeasure();
     List<Kluster> clusters = Arrays.asList(new Kluster(points.get(0), 0, distanceMeasure), new Kluster(points.get(3),
         3, distanceMeasure));
-    
+
     // To converge in a single run, the given distance threshold should be
     // greater than or equal to 0.125,
     // since 0.125 will be the distance between center and centroid for the
     // initial two clusters after one run.
     double distanceThreshold = 0.25;
-    
+
     boolean converged = KMeansClusterer.runKMeansIteration(points, clusters, distanceMeasure, distanceThreshold);
-    
+
     Vector cluster1Center = clusters.get(0).getCenter();
     assertEquals(0, cluster1Center.get(0), EPSILON);
     assertEquals(0.125, cluster1Center.get(1), EPSILON);
-    
+
     Vector cluster2Center = clusters.get(1).getCenter();
     assertEquals(0, cluster2Center.get(0), EPSILON);
     assertEquals(0.875, cluster2Center.get(1), EPSILON);
-    
+
     assertTrue("KMeans iteration should be converged after a single run", converged);
   }*/
-  
+
   /** Story: User wishes to run kmeans job on reference data */
   @Test
   public void testKMeansSeqJob() throws Exception {
@@ -161,8 +161,6 @@ public final class TestKmeansClustering 
       }
       // now run the Job
       Path outputPath = getTestTempDirPath("output");
-      // KMeansDriver.runJob(pointsPath, clustersPath, outputPath,
-      // EuclideanDistanceMeasure.class.getName(), 0.001, 10, k + 1, true);
       String[] args = {optKey(DefaultOptionCreator.INPUT_OPTION), pointsPath.toString(),
           optKey(DefaultOptionCreator.CLUSTERS_IN_OPTION), clustersPath.toString(),
           optKey(DefaultOptionCreator.OUTPUT_OPTION), outputPath.toString(),
@@ -176,9 +174,9 @@ public final class TestKmeansClustering 
       // now compare the expected clusters with actual
       Path clusteredPointsPath = new Path(outputPath, "clusteredPoints");
       int[] expect = EXPECTED_NUM_POINTS[k];
-      DummyOutputCollector<IntWritable,WeightedVectorWritable> collector = new DummyOutputCollector<IntWritable,WeightedVectorWritable>();
+      DummyOutputCollector<IntWritable,WeightedPropertyVectorWritable> collector = new DummyOutputCollector<IntWritable,WeightedPropertyVectorWritable>();
       // The key is the clusterId, the value is the weighted vector
-      for (Pair<IntWritable,WeightedVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedVectorWritable>(
+      for (Pair<IntWritable,WeightedPropertyVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedPropertyVectorWritable>(
           new Path(clusteredPointsPath, "part-m-0"), conf)) {
         collector.collect(record.getFirst(), record.getSecond());
       }
@@ -217,8 +215,6 @@ public final class TestKmeansClustering 
       }
       // now run the Job
       Path outputPath = getTestTempDirPath("output");
-      // KMeansDriver.runJob(pointsPath, clustersPath, outputPath,
-      // EuclideanDistanceMeasure.class.getName(), 0.001, 10, k + 1, true);
       String[] args = {optKey(DefaultOptionCreator.INPUT_OPTION), pointsPath.toString(),
           optKey(DefaultOptionCreator.CLUSTERS_IN_OPTION), clustersPath.toString(),
           optKey(DefaultOptionCreator.OUTPUT_OPTION), outputPath.toString(),
@@ -232,9 +228,9 @@ public final class TestKmeansClustering 
       // now compare the expected clusters with actual
       Path clusteredPointsPath = new Path(outputPath, "clusteredPoints");
       int[] expect = EXPECTED_NUM_POINTS[k];
-      DummyOutputCollector<IntWritable,WeightedVectorWritable> collector = new DummyOutputCollector<IntWritable,WeightedVectorWritable>();
+      DummyOutputCollector<IntWritable,WeightedPropertyVectorWritable> collector = new DummyOutputCollector<IntWritable,WeightedPropertyVectorWritable>();
       // The key is the clusterId, the value is the weighted vector
-      for (Pair<IntWritable,WeightedVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedVectorWritable>(
+      for (Pair<IntWritable,WeightedPropertyVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedPropertyVectorWritable>(
           new Path(clusteredPointsPath, "part-m-0"), conf)) {
         collector.collect(record.getFirst(), record.getSecond());
       }
@@ -274,8 +270,6 @@ public final class TestKmeansClustering 
       }
       // now run the Job
       Path outputPath = getTestTempDirPath("output");
-      // KMeansDriver.runJob(pointsPath, clustersPath, outputPath,
-      // EuclideanDistanceMeasure.class.getName(), 0.001, 10, k + 1, true);
       String[] args = {optKey(DefaultOptionCreator.INPUT_OPTION), pointsPath.toString(),
           optKey(DefaultOptionCreator.CLUSTERS_IN_OPTION), clustersPath.toString(),
           optKey(DefaultOptionCreator.OUTPUT_OPTION), outputPath.toString(),
@@ -289,9 +283,9 @@ public final class TestKmeansClustering 
       Path clusteredPointsPath = new Path(outputPath, "clusteredPoints");
       // assertEquals("output dir files?", 4, outFiles.length);
       int[] expect = EXPECTED_NUM_POINTS[k];
-      DummyOutputCollector<IntWritable,WeightedVectorWritable> collector = new DummyOutputCollector<IntWritable,WeightedVectorWritable>();
+      DummyOutputCollector<IntWritable,WeightedPropertyVectorWritable> collector = new DummyOutputCollector<IntWritable,WeightedPropertyVectorWritable>();
       // The key is the clusterId, the value is the weighted vector
-      for (Pair<IntWritable,WeightedVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedVectorWritable>(
+      for (Pair<IntWritable,WeightedPropertyVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedPropertyVectorWritable>(
           new Path(clusteredPointsPath, "part-m-00000"), conf)) {
         collector.collect(record.getFirst(), record.getSecond());
       }
@@ -360,35 +354,31 @@ public final class TestKmeansClustering 
     
     // now compare the expected clusters with actual
     Path clusteredPointsPath = new Path(kmeansOutput, "clusteredPoints");
-    DummyOutputCollector<IntWritable,WeightedVectorWritable> collector = new DummyOutputCollector<IntWritable,WeightedVectorWritable>();
+    DummyOutputCollector<IntWritable,WeightedPropertyVectorWritable> collector = new DummyOutputCollector<IntWritable,WeightedPropertyVectorWritable>();
     
     // The key is the clusterId, the value is the weighted vector
-    for (Pair<IntWritable,WeightedVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedVectorWritable>(
+    for (Pair<IntWritable,WeightedPropertyVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedPropertyVectorWritable>(
         new Path(clusteredPointsPath, "part-m-00000"), conf)) {
       collector.collect(record.getFirst(), record.getSecond());
     }
     
-    //boolean gotLowClust = false;  // clusters should be [1, *] and [2, *]
-    //boolean gotHighClust = false; // vs [3 , *],  [4 , *] and [5, *]
     for (IntWritable k : collector.getKeys()) {
-      List<WeightedVectorWritable> wvList = collector.getValue(k);
-      assertTrue("empty cluster!", !wvList.isEmpty());
-      if (wvList.get(0).getVector().get(0) <= 2.0) {
-        for (WeightedVectorWritable wv : wvList) {
+      List<WeightedPropertyVectorWritable> wpvList = collector.getValue(k);
+      assertTrue("empty cluster!", !wpvList.isEmpty());
+      if (wpvList.get(0).getVector().get(0) <= 2.0) {
+        for (WeightedPropertyVectorWritable wv : wpvList) {
           Vector v = wv.getVector();
           int idx = v.maxValueIndex();
           assertTrue("bad cluster!", v.get(idx) <= 2.0);
         }
-        assertEquals("Wrong size cluster", 4, wvList.size());
-        //gotLowClust= true;
+        assertEquals("Wrong size cluster", 4, wpvList.size());
       } else {
-        for (WeightedVectorWritable wv : wvList) {
+        for (WeightedPropertyVectorWritable wv : wpvList) {
           Vector v = wv.getVector();
           int idx = v.minValueIndex();
           assertTrue("bad cluster!", v.get(idx) > 2.0);
         }
-        assertEquals("Wrong size cluster", 5, wvList.size());
-        //gotHighClust= true;
+        assertEquals("Wrong size cluster", 5, wpvList.size());
       }
     }
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java Sun Dec  8 18:16:10 2013
@@ -27,7 +27,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.lang3.StringUtils;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
 import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.distance.DistanceMeasure;
@@ -45,7 +45,7 @@ public abstract class AbstractClusterWri
   private static final Logger log = LoggerFactory.getLogger(AbstractClusterWriter.class);
 
   protected final Writer writer;
-  protected final Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints;
+  protected final Map<Integer, List<WeightedPropertyVectorWritable>> clusterIdToPoints;
   protected final DistanceMeasure measure;
 
   /**
@@ -56,7 +56,7 @@ public abstract class AbstractClusterWri
    * @param measure The {@link org.apache.mahout.common.distance.DistanceMeasure} used to calculate the distance.
    *                Some writers may wish to use it for calculating weights for display.  May be null.
    */
-  protected AbstractClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints,
+  protected AbstractClusterWriter(Writer writer, Map<Integer, List<WeightedPropertyVectorWritable>> clusterIdToPoints,
       DistanceMeasure measure) {
     this.writer = writer;
     this.clusterIdToPoints = clusterIdToPoints;
@@ -67,7 +67,7 @@ public abstract class AbstractClusterWri
     return writer;
   }
 
-  protected Map<Integer, List<WeightedVectorWritable>> getClusterIdToPoints() {
+  protected Map<Integer, List<WeightedPropertyVectorWritable>> getClusterIdToPoints() {
     return clusterIdToPoints;
   }
 

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java Sun Dec  8 18:16:10 2013
@@ -18,7 +18,7 @@
 package org.apache.mahout.utils.clustering;
 
 import org.apache.mahout.clustering.Cluster;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
 import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.math.NamedVector;
@@ -39,7 +39,7 @@ public class CSVClusterWriter extends Ab
 
   private static final Pattern VEC_PATTERN = Pattern.compile("\\{|\\:|\\,|\\}");
 
-  public CSVClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints,
+  public CSVClusterWriter(Writer writer, Map<Integer, List<WeightedPropertyVectorWritable>> clusterIdToPoints,
       DistanceMeasure measure) {
     super(writer, clusterIdToPoints, measure);
   }
@@ -49,9 +49,9 @@ public class CSVClusterWriter extends Ab
     StringBuilder line = new StringBuilder();
     Cluster cluster = clusterWritable.getValue();
     line.append(cluster.getId());
-    List<WeightedVectorWritable> points = getClusterIdToPoints().get(cluster.getId());
+    List<WeightedPropertyVectorWritable> points = getClusterIdToPoints().get(cluster.getId());
     if (points != null) {
-      for (WeightedVectorWritable point : points) {
+      for (WeightedPropertyVectorWritable point : points) {
         Vector theVec = point.getVector();
         line.append(',');
         if (theVec instanceof NamedVector) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Sun Dec  8 18:16:10 2013
@@ -31,7 +31,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.mahout.clustering.cdbw.CDbwEvaluator;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
 import org.apache.mahout.clustering.evaluation.ClusterEvaluator;
 import org.apache.mahout.clustering.evaluation.RepresentativePointsDriver;
 import org.apache.mahout.clustering.iterator.ClusterWritable;
@@ -83,7 +83,7 @@ public final class ClusterDumper extends
   private String dictionaryFormat;
   private int subString = Integer.MAX_VALUE;
   private int numTopFeatures = 10;
-  private Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints;
+  private Map<Integer, List<WeightedPropertyVectorWritable>> clusterIdToPoints;
   private OUTPUT_FORMAT outputFormat = OUTPUT_FORMAT.TEXT;
   private boolean runEvaluation;
 
@@ -275,7 +275,7 @@ public final class ClusterDumper extends
     this.subString = subString;
   }
 
-  public Map<Integer, List<WeightedVectorWritable>> getClusterIdToPoints() {
+  public Map<Integer, List<WeightedPropertyVectorWritable>> getClusterIdToPoints() {
     return clusterIdToPoints;
   }
 
@@ -304,18 +304,17 @@ public final class ClusterDumper extends
     this.maxPointsPerCluster = maxPointsPerCluster;
   }
 
-  public static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir, long maxPointsPerCluster,
+  public static Map<Integer, List<WeightedPropertyVectorWritable>> readPoints(Path pointsPathDir, long maxPointsPerCluster,
       Configuration conf) {
-    Map<Integer, List<WeightedVectorWritable>> result = Maps.newTreeMap();
-    for (Pair<IntWritable, WeightedVectorWritable> record
-        : new SequenceFileDirIterable<IntWritable, WeightedVectorWritable>(pointsPathDir, PathType.LIST,
+    Map<Integer, List<WeightedPropertyVectorWritable>> result = Maps.newTreeMap();
+    for (Pair<IntWritable, WeightedPropertyVectorWritable> record
+        : new SequenceFileDirIterable<IntWritable, WeightedPropertyVectorWritable>(pointsPathDir, PathType.LIST,
             PathFilters.logsCRCFilter(), conf)) {
       // value is the cluster id as an int, key is the name/id of the
-      // vector, but that doesn't matter because we only care about printing
-      // it
+      // vector, but that doesn't matter because we only care about printing it
       //String clusterId = value.toString();
       int keyValue = record.getFirst().get();
-      List<WeightedVectorWritable> pointList = result.get(keyValue);
+      List<WeightedPropertyVectorWritable> pointList = result.get(keyValue);
       if (pointList == null) {
         pointList = Lists.newArrayList();
         result.put(keyValue, pointList);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java Sun Dec  8 18:16:10 2013
@@ -21,7 +21,6 @@ import org.apache.hadoop.io.Text;
 import org.apache.mahout.clustering.AbstractCluster;
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
 import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.distance.DistanceMeasure;
 
@@ -40,7 +39,7 @@ public class ClusterDumperWriter extends
   private final String[] dictionary;
   private final int numTopFeatures;
   
-  public ClusterDumperWriter(Writer writer, Map<Integer,List<WeightedVectorWritable>> clusterIdToPoints,
+  public ClusterDumperWriter(Writer writer, Map<Integer,List<WeightedPropertyVectorWritable>> clusterIdToPoints,
       DistanceMeasure measure, int numTopFeatures, String[] dictionary, int subString) {
     super(writer, clusterIdToPoints, measure);
     this.numTopFeatures = numTopFeatures;
@@ -69,27 +68,24 @@ public class ClusterDumperWriter extends
       writer.write('\n');
     }
     
-    Map<Integer,List<WeightedVectorWritable>> clusterIdToPoints = getClusterIdToPoints();
-    List<WeightedVectorWritable> points = clusterIdToPoints.get(clusterWritable.getValue().getId());
+    Map<Integer,List<WeightedPropertyVectorWritable>> clusterIdToPoints = getClusterIdToPoints();
+    List<WeightedPropertyVectorWritable> points = clusterIdToPoints.get(clusterWritable.getValue().getId());
     if (points != null) {
       writer.write("\tWeight : [props - optional]:  Point:\n\t");
-      for (Iterator<WeightedVectorWritable> iterator = points.iterator(); iterator.hasNext();) {
-        WeightedVectorWritable point = iterator.next();
+      for (Iterator<WeightedPropertyVectorWritable> iterator = points.iterator(); iterator.hasNext();) {
+        WeightedPropertyVectorWritable point = iterator.next();
         writer.write(String.valueOf(point.getWeight()));
-        if (point instanceof WeightedPropertyVectorWritable) {
-          WeightedPropertyVectorWritable tmp = (WeightedPropertyVectorWritable) point;
-          Map<Text,Text> map = tmp.getProperties();
-          // map can be null since empty maps when written are returned as null
-          writer.write(" : [");
-          if (map != null) {
-            for (Map.Entry<Text,Text> entry : map.entrySet()) {
-              writer.write(entry.getKey().toString());
-              writer.write("=");
-              writer.write(entry.getValue().toString());
-            }
+        Map<Text,Text> map = point.getProperties();
+        // map can be null since empty maps when written are returned as null
+        writer.write(" : [");
+        if (map != null) {
+          for (Map.Entry<Text,Text> entry : map.entrySet()) {
+            writer.write(entry.getKey().toString());
+            writer.write("=");
+            writer.write(entry.getValue().toString());
           }
-          writer.write("]");
         }
+        writer.write("]");
         
         writer.write(": ");
         

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java Sun Dec  8 18:16:10 2013
@@ -26,6 +26,7 @@ import java.util.Random;
 import java.util.regex.Pattern;
 
 import org.apache.mahout.clustering.Cluster;
+import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
 import org.apache.mahout.clustering.classify.WeightedVectorWritable;
 import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.RandomUtils;
@@ -50,7 +51,7 @@ public class GraphMLClusterWriter extend
   private final int numTopFeatures;
   private final int subString;
 
-  public GraphMLClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints,
+  public GraphMLClusterWriter(Writer writer, Map<Integer, List<WeightedPropertyVectorWritable>> clusterIdToPoints,
                               DistanceMeasure measure, int numTopFeatures, String[] dictionary, int subString)
     throws IOException {
     super(writer, clusterIdToPoints, measure);
@@ -115,7 +116,7 @@ public class GraphMLClusterWriter extend
     }
 
     line.append(createNode(clusterLabel, rgb, x, y));
-    List<WeightedVectorWritable> points = clusterIdToPoints.get(cluster.getId());
+    List<WeightedPropertyVectorWritable> points = clusterIdToPoints.get(cluster.getId());
     if (points != null) {
       for (WeightedVectorWritable point : points) {
         Vector theVec = point.getVector();

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java Sun Dec  8 18:16:10 2013
@@ -27,7 +27,7 @@ import java.util.regex.Pattern;
 import com.google.common.collect.Maps;
 import org.apache.mahout.clustering.AbstractCluster;
 import org.apache.mahout.clustering.Cluster;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
 import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.math.NamedVector;
@@ -53,7 +53,7 @@ public class JsonClusterWriter extends A
   private static final Pattern VEC_PATTERN = Pattern.compile("\\{|\\:|\\,|\\}");
 
   public JsonClusterWriter(Writer writer,
-      Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints,
+      Map<Integer, List<WeightedPropertyVectorWritable>> clusterIdToPoints,
       DistanceMeasure measure, int numTopFeatures, String[] dictionary) {
     super(writer, clusterIdToPoints, measure);
     this.numTopFeatures = numTopFeatures;
@@ -136,11 +136,11 @@ public class JsonClusterWriter extends A
    */
   public List<Object> getPoints(Cluster cluster, String[] dictionary) {
     List<Object> vectorObjs = Lists.newLinkedList();
-    List<WeightedVectorWritable> points = getClusterIdToPoints().get(
+    List<WeightedPropertyVectorWritable> points = getClusterIdToPoints().get(
         cluster.getId());
 
     if (points != null) {
-      for (WeightedVectorWritable point : points) {
+      for (WeightedPropertyVectorWritable point : points) {
         Map<String, Object> entry = Maps.newHashMap();
         Vector theVec = point.getVector();
         if (theVec instanceof NamedVector) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1549087&r1=1549086&r2=1549087&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Sun Dec  8 18:16:10 2013
@@ -56,7 +56,7 @@ import org.apache.lucene.store.FSDirecto
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.OpenBitSet;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.math.NamedVector;
@@ -86,7 +86,7 @@ public class ClusterLabels {
   private final String indexDir;
   private final String contentField;
   private String idField;
-  private final Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints;
+  private final Map<Integer, List<WeightedPropertyVectorWritable>> clusterIdToPoints;
   private String output;
   private final int minNumIds;
   private final int maxLabels;
@@ -114,15 +114,15 @@ public class ClusterLabels {
       writer = Files.newWriter(new File(this.output), Charsets.UTF_8);
     }
     try {
-      for (Map.Entry<Integer, List<WeightedVectorWritable>> integerListEntry : clusterIdToPoints.entrySet()) {
-        List<WeightedVectorWritable> wvws = integerListEntry.getValue();
-        List<TermInfoClusterInOut> termInfos = getClusterLabels(integerListEntry.getKey(), wvws);
+      for (Map.Entry<Integer, List<WeightedPropertyVectorWritable>> integerListEntry : clusterIdToPoints.entrySet()) {
+        List<WeightedPropertyVectorWritable> wpvws = integerListEntry.getValue();
+        List<TermInfoClusterInOut> termInfos = getClusterLabels(integerListEntry.getKey(), wpvws);
         if (termInfos != null) {
           writer.write('\n');
           writer.write("Top labels for Cluster ");
           writer.write(String.valueOf(integerListEntry.getKey()));
           writer.write(" containing ");
-          writer.write(String.valueOf(wvws.size()));
+          writer.write(String.valueOf(wpvws.size()));
           writer.write(" vectors");
           writer.write('\n');
           writer.write("Term \t\t LLR \t\t In-ClusterDF \t\t Out-ClusterDF ");
@@ -148,14 +148,14 @@ public class ClusterLabels {
    * Get the list of labels, sorted by best score.
    */
   protected List<TermInfoClusterInOut> getClusterLabels(Integer integer,
-                                                        Collection<WeightedVectorWritable> wvws) throws IOException {
+                                                        Collection<WeightedPropertyVectorWritable> wpvws) throws IOException {
 
-    if (wvws.size() < minNumIds) {
-      log.info("Skipping small cluster {} with size: {}", integer, wvws.size());
+    if (wpvws.size() < minNumIds) {
+      log.info("Skipping small cluster {} with size: {}", integer, wpvws.size());
       return null;
     }
 
-    log.info("Processing Cluster {} with {} documents", integer, wvws.size());
+    log.info("Processing Cluster {} with {} documents", integer, wpvws.size());
     Directory dir = FSDirectory.open(new File(this.indexDir));
     IndexReader reader = DirectoryReader.open(dir);
     
@@ -163,8 +163,8 @@ public class ClusterLabels {
     log.info("# of documents in the index {}", reader.numDocs());
 
     Collection<String> idSet = Sets.newHashSet();
-    for (WeightedVectorWritable wvw : wvws) {
-      Vector vector = wvw.getVector();
+    for (WeightedPropertyVectorWritable wpvw : wpvws) {
+      Vector vector = wpvw.getVector();
       if (vector instanceof NamedVector) {
         idSet.add(((NamedVector) vector).getName());
       }
@@ -216,7 +216,7 @@ public class ClusterLabels {
 
     List<TermInfoClusterInOut> clusteredTermInfo = Lists.newLinkedList();
 
-    int clusterSize = wvws.size();
+    int clusterSize = wpvws.size();
 
     for (TermEntry termEntry : termEntryMap.values()) {