You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pr...@apache.org on 2012/03/23 18:09:36 UTC
svn commit: r1304490 [2/2] - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/canopy/
core/src/main/java/org/apache/mahout/clustering/classify/
core/src/main/java/org/apache/mahout/clustering/dirichlet/
core/src/main/java/org/apache/ma...
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java Fri Mar 23 17:09:35 2012
@@ -17,16 +17,6 @@
package org.apache.mahout.utils.clustering;
-import com.google.common.collect.Lists;
-import org.apache.commons.lang.StringUtils;
-import org.apache.mahout.clustering.Cluster;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.distance.DistanceMeasure;
-import org.apache.mahout.math.Vector;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import java.io.IOException;
import java.io.Writer;
import java.util.Collection;
@@ -37,6 +27,17 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import org.apache.commons.lang.StringUtils;
+import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.math.Vector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Lists;
+
/**
* Base class for implementing ClusterWriter
*/
@@ -151,7 +152,7 @@ public abstract class AbstractClusterWri
}
@Override
- public long write(Iterable<Cluster> iterable) throws IOException {
+ public long write(Iterable<ClusterWritable> iterable) throws IOException {
return write(iterable, Long.MAX_VALUE);
}
@@ -161,9 +162,9 @@ public abstract class AbstractClusterWri
}
@Override
- public long write(Iterable<Cluster> iterable, long maxDocs) throws IOException {
+ public long write(Iterable<ClusterWritable> iterable, long maxDocs) throws IOException {
long result = 0;
- Iterator<Cluster> iterator = iterable.iterator();
+ Iterator<ClusterWritable> iterator = iterable.iterator();
while (result < maxDocs && iterator.hasNext()){
write(iterator.next());
result++;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java Fri Mar 23 17:09:35 2012
@@ -19,6 +19,7 @@ package org.apache.mahout.utils.clusteri
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
@@ -43,9 +44,10 @@ public class CSVClusterWriter extends Ab
}
@Override
- public void write(Cluster cluster) throws IOException {
+ public void write(ClusterWritable clusterWritable) throws IOException {
StringBuilder line = new StringBuilder();
- line.append(cluster.getId());
+ Cluster cluster = clusterWritable.getValue();
+ line.append(cluster.getId());
List<WeightedVectorWritable> points = getClusterIdToPoints().get(cluster.getId());
if (points != null) {
for (WeightedVectorWritable point : points) {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Fri Mar 23 17:09:35 2012
@@ -17,19 +17,24 @@
package org.apache.mahout.utils.clustering;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-import com.google.common.io.Files;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
-import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.cdbw.CDbwEvaluator;
import org.apache.mahout.clustering.classify.WeightedVectorWritable;
import org.apache.mahout.clustering.evaluation.ClusterEvaluator;
import org.apache.mahout.clustering.evaluation.RepresentativePointsDriver;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
@@ -44,14 +49,10 @@ import org.apache.mahout.utils.vectors.V
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import com.google.common.io.Closeables;
+import com.google.common.io.Files;
public final class ClusterDumper extends AbstractJob {
@@ -183,7 +184,7 @@ public final class ClusterDumper extends
}
ClusterWriter clusterWriter = createClusterWriter(writer, dictionary);
try {
- long numWritten = clusterWriter.write(new SequenceFileDirValueIterable<Cluster>(new Path(seqFileDir, "part-*"), PathType.GLOB, conf));
+ long numWritten = clusterWriter.write(new SequenceFileDirValueIterable<ClusterWritable>(new Path(seqFileDir, "part-*"), PathType.GLOB, conf));
writer.flush();
if (runEvaluation){
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumperWriter.java Fri Mar 23 17:09:35 2012
@@ -22,6 +22,7 @@ import org.apache.mahout.clustering.Abst
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.distance.DistanceMeasure;
import java.io.IOException;
@@ -52,8 +53,9 @@ public class ClusterDumperWriter extends
}
@Override
- public void write(Cluster value) throws IOException {
- String fmtStr = value.asFormatString(dictionary);
+ public void write(ClusterWritable clusterWritable) throws IOException {
+ Cluster cluster = clusterWritable.getValue();
+ String fmtStr = cluster.asFormatString(dictionary);
Writer writer = getWriter();
if (subString > 0 && fmtStr.length() > subString) {
writer.write(':');
@@ -65,14 +67,14 @@ public class ClusterDumperWriter extends
writer.write('\n');
if (dictionary != null) {
- String topTerms = getTopFeatures(value.getCenter(), dictionary, numTopFeatures);
+ String topTerms = getTopFeatures(clusterWritable.getValue().getCenter(), dictionary, numTopFeatures);
writer.write("\tTop Terms: ");
writer.write(topTerms);
writer.write('\n');
}
Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints = getClusterIdToPoints();
- List<WeightedVectorWritable> points = clusterIdToPoints.get(value.getId());
+ List<WeightedVectorWritable> points = clusterIdToPoints.get(clusterWritable.getValue().getId());
if (points != null) {
writer.write("\tWeight : [props - optional]: Point:\n\t");
for (Iterator<WeightedVectorWritable> iterator = points.iterator(); iterator.hasNext(); ) {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java Fri Mar 23 17:09:35 2012
@@ -17,11 +17,11 @@
package org.apache.mahout.utils.clustering;
-import org.apache.mahout.clustering.Cluster;
-
import java.io.Closeable;
import java.io.IOException;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
+
/**
* Writes out clusters
*/
@@ -33,7 +33,7 @@ public interface ClusterWriter extends C
* @return the number of docs written
* @throws java.io.IOException if there was a problem writing
*/
- long write(Iterable<Cluster> iterable) throws IOException;
+ long write(Iterable<ClusterWritable> iterable) throws IOException;
/**
* Write out a Cluster
@@ -41,7 +41,7 @@ public interface ClusterWriter extends C
* @param cluster The {@link org.apache.mahout.clustering.Cluster} to write
* @throws IOException
*/
- void write(Cluster cluster) throws IOException;
+ void write(ClusterWritable clusterWritable) throws IOException;
/**
* Write the first {@code maxDocs} to the output.
@@ -51,5 +51,5 @@ public interface ClusterWriter extends C
* @return The number of docs written
* @throws IOException if there was a problem writing
*/
- long write(Iterable<Cluster> iterable, long maxDocs) throws IOException;
+ long write(Iterable<ClusterWritable> iterable, long maxDocs) throws IOException;
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java Fri Mar 23 17:09:35 2012
@@ -17,13 +17,6 @@
package org.apache.mahout.utils.clustering;
-import org.apache.mahout.clustering.Cluster;
-import org.apache.mahout.clustering.classify.WeightedVectorWritable;
-import org.apache.mahout.common.StringUtils;
-import org.apache.mahout.common.distance.DistanceMeasure;
-import org.apache.mahout.math.NamedVector;
-import org.apache.mahout.math.Vector;
-
import java.io.IOException;
import java.io.Writer;
import java.util.HashMap;
@@ -32,6 +25,14 @@ import java.util.Map;
import java.util.Random;
import java.util.regex.Pattern;
+import org.apache.mahout.clustering.Cluster;
+import org.apache.mahout.clustering.classify.WeightedVectorWritable;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
+import org.apache.mahout.common.StringUtils;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.math.NamedVector;
+import org.apache.mahout.math.Vector;
+
/**
* GraphML -- see http://gephi.org/users/supported-graph-formats/graphml-format/
*/
@@ -93,9 +94,10 @@ public class GraphMLClusterWriter extend
*/
@Override
- public void write(Cluster cluster) throws IOException {
+ public void write(ClusterWritable clusterWritable) throws IOException {
StringBuilder line = new StringBuilder();
- Color rgb = getColor(cluster.getId());
+ Cluster cluster = clusterWritable.getValue();
+ Color rgb = getColor(cluster.getId());
String topTerms = "";
if (dictionary != null) {
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1304490&r1=1304489&r2=1304490&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Fri Mar 23 17:09:35 2012
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -72,6 +71,7 @@ import org.apache.mahout.vectorizer.Weig
import org.junit.Before;
import org.junit.Test;
+import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
public final class TestClusterDumper extends MahoutTestCase {