You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pr...@apache.org on 2012/03/23 18:09:36 UTC

svn commit: r1304490 [2/2] - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/apache/mahout/clustering/classify/ core/src/main/java/org/apache/mahout/clustering/dirichlet/ core/src/main/java/org/apache/ma...

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java Fri Mar 23 17:09:35 2012
@@ -17,16 +17,6 @@
 
 package org.apache.mahout.utils.clustering;
 
-import com.google.common.collect.Lists;
-import org.apache.commons.lang.StringUtils;
-import org.apache.mahout.clustering.Cluster;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.distance.DistanceMeasure;
-import org.apache.mahout.math.Vector;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.io.IOException;
 import java.io.Writer;
 import java.util.Collection;
@@ -37,6 +27,17 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.math.Vector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Lists;
+
 /**
  * Base class for implementing ClusterWriter
  */
@@ -151,7 +152,7 @@ public abstract class AbstractClusterWri
   }
 
   @Override
-  public long write(Iterable<Cluster> iterable) throws IOException {
+  public long write(Iterable<ClusterWritable> iterable) throws IOException {
     return write(iterable, Long.MAX_VALUE);
   }
 
@@ -161,9 +162,9 @@ public abstract class AbstractClusterWri
   }
 
   @Override
-  public long write(Iterable<Cluster> iterable, long maxDocs) throws IOException {
+  public long write(Iterable<ClusterWritable> iterable, long maxDocs) throws IOException {
     long result = 0;
-    Iterator<Cluster> iterator = iterable.iterator();
+    Iterator<ClusterWritable> iterator = iterable.iterator();
     while (result < maxDocs && iterator.hasNext()){
       write(iterator.next());
       result++;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java Fri Mar 23 17:09:35 2012
@@ -19,6 +19,7 @@ package org.apache.mahout.utils.clusteri
 
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
@@ -43,9 +44,10 @@ public class CSVClusterWriter extends Ab
   }
 
   @Override
-  public void write(Cluster cluster) throws IOException {
+  public void write(ClusterWritable clusterWritable) throws IOException {
     StringBuilder line = new StringBuilder();
-    line.append(cluster.getId());
+    Cluster cluster = clusterWritable.getValue();
+	line.append(cluster.getId());
     List<WeightedVectorWritable> points = getClusterIdToPoints().get(cluster.getId());
     if (points != null) {
       for (WeightedVectorWritable point : points) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Fri Mar 23 17:09:35 2012
@@ -17,19 +17,24 @@
 
 package org.apache.mahout.utils.clustering;
 
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-import com.google.common.io.Files;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.cdbw.CDbwEvaluator;
 import org.apache.mahout.clustering.classify.WeightedVectorWritable;
 import org.apache.mahout.clustering.evaluation.ClusterEvaluator;
 import org.apache.mahout.clustering.evaluation.RepresentativePointsDriver;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.HadoopUtil;
@@ -44,14 +49,10 @@ import org.apache.mahout.utils.vectors.V
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import com.google.common.io.Closeables;
+import com.google.common.io.Files;
 
 public final class ClusterDumper extends AbstractJob {
 
@@ -183,7 +184,7 @@ public final class ClusterDumper extends
     }
     ClusterWriter clusterWriter = createClusterWriter(writer, dictionary);
     try {
-      long numWritten = clusterWriter.write(new SequenceFileDirValueIterable<Cluster>(new Path(seqFileDir, "part-*"), PathType.GLOB, conf));
+      long numWritten = clusterWriter.write(new SequenceFileDirValueIterable<ClusterWritable>(new Path(seqFileDir, "part-*"), PathType.GLOB, conf));
 
       writer.flush();
       if (runEvaluation){

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java Fri Mar 23 17:09:35 2012
@@ -22,6 +22,7 @@ import org.apache.mahout.clustering.Abst
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
 import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.common.distance.DistanceMeasure;
 
 import java.io.IOException;
@@ -52,8 +53,9 @@ public class ClusterDumperWriter extends
   }
 
   @Override
-  public void write(Cluster value) throws IOException {
-    String fmtStr = value.asFormatString(dictionary);
+  public void write(ClusterWritable clusterWritable) throws IOException {
+    Cluster cluster = clusterWritable.getValue();
+    String fmtStr = cluster.asFormatString(dictionary);
     Writer writer = getWriter();
     if (subString > 0 && fmtStr.length() > subString) {
       writer.write(':');
@@ -65,14 +67,14 @@ public class ClusterDumperWriter extends
     writer.write('\n');
 
     if (dictionary != null) {
-      String topTerms = getTopFeatures(value.getCenter(), dictionary, numTopFeatures);
+      String topTerms = getTopFeatures(clusterWritable.getValue().getCenter(), dictionary, numTopFeatures);
       writer.write("\tTop Terms: ");
       writer.write(topTerms);
       writer.write('\n');
     }
 
     Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints = getClusterIdToPoints();
-    List<WeightedVectorWritable> points = clusterIdToPoints.get(value.getId());
+    List<WeightedVectorWritable> points = clusterIdToPoints.get(clusterWritable.getValue().getId());
     if (points != null) {
       writer.write("\tWeight : [props - optional]:  Point:\n\t");
       for (Iterator<WeightedVectorWritable> iterator = points.iterator(); iterator.hasNext(); ) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java Fri Mar 23 17:09:35 2012
@@ -17,11 +17,11 @@
 
 package org.apache.mahout.utils.clustering;
 
-import org.apache.mahout.clustering.Cluster;
-
 import java.io.Closeable;
 import java.io.IOException;
 
+import org.apache.mahout.clustering.iterator.ClusterWritable;
+
 /**
  * Writes out clusters
  */
@@ -33,7 +33,7 @@ public interface ClusterWriter extends C
    * @return the number of docs written
    * @throws java.io.IOException if there was a problem writing
    */
-  long write(Iterable<Cluster> iterable) throws IOException;
+  long write(Iterable<ClusterWritable> iterable) throws IOException;
 
   /**
    * Write out a Cluster
@@ -41,7 +41,7 @@ public interface ClusterWriter extends C
    * @param cluster The {@link org.apache.mahout.clustering.Cluster} to write
    * @throws IOException
    */
-  void write(Cluster cluster) throws IOException;
+  void write(ClusterWritable clusterWritable) throws IOException;
 
   /**
    * Write the first {@code maxDocs} to the output.
@@ -51,5 +51,5 @@ public interface ClusterWriter extends C
    * @return The number of docs written
    * @throws IOException if there was a problem writing
    */
-  long write(Iterable<Cluster> iterable, long maxDocs) throws IOException;
+  long write(Iterable<ClusterWritable> iterable, long maxDocs) throws IOException;
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java Fri Mar 23 17:09:35 2012
@@ -17,13 +17,6 @@
 
 package org.apache.mahout.utils.clustering;
 
-import org.apache.mahout.clustering.Cluster;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
-import org.apache.mahout.common.StringUtils;
-import org.apache.mahout.common.distance.DistanceMeasure;
-import org.apache.mahout.math.NamedVector;
-import org.apache.mahout.math.Vector;
-
 import java.io.IOException;
 import java.io.Writer;
 import java.util.HashMap;
@@ -32,6 +25,14 @@ import java.util.Map;
 import java.util.Random;
 import java.util.regex.Pattern;
 
+import org.apache.mahout.clustering.Cluster;
+import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
+import org.apache.mahout.common.StringUtils;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.math.NamedVector;
+import org.apache.mahout.math.Vector;
+
 /**
  * GraphML -- see http://gephi.org/users/supported-graph-formats/graphml-format/
  */
@@ -93,9 +94,10 @@ public class GraphMLClusterWriter extend
    */
 
   @Override
-  public void write(Cluster cluster) throws IOException {
+  public void write(ClusterWritable clusterWritable) throws IOException {
     StringBuilder line = new StringBuilder();
-    Color rgb = getColor(cluster.getId());
+    Cluster cluster = clusterWritable.getValue();
+	Color rgb = getColor(cluster.getId());
 
     String topTerms = "";
     if (dictionary != null) {

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Fri Mar 23 17:09:35 2012
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.util.Iterator;
 import java.util.List;
 
-import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -72,6 +71,7 @@ import org.apache.mahout.vectorizer.Weig
 import org.junit.Before;
 import org.junit.Test;
 
+import com.google.common.collect.Lists;
 import com.google.common.io.Closeables;
 
 public final class TestClusterDumper extends MahoutTestCase {