You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/07/08 02:58:11 UTC
svn commit: r961548 - in /mahout/trunk/core/src:
main/java/org/apache/mahout/clustering/dirichlet/
main/java/org/apache/mahout/clustering/dirichlet/models/
main/java/org/apache/mahout/clustering/meanshift/
test/java/org/apache/mahout/clustering/dirichl...
Author: jeastman
Date: Thu Jul 8 00:58:10 2010
New Revision: 961548
URL: http://svn.apache.org/viewvc?rev=961548&view=rev
Log:
MAHOUT-167: Minor code cleanup to make PMD happier. Also fixed minor bug in DummyStatusReporter. All tests run
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java
- copied, changed from r961473, mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java
Removed:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java Thu Jul 8 00:58:10 2010
@@ -77,6 +77,7 @@ public class DirichletCluster<O> impleme
}
/** Reads a typed Model instance from the input stream */
+ @SuppressWarnings("unchecked")
public static <O> Model<O> readModel(DataInput in) throws IOException {
String modelClassName = in.readUTF();
Model<O> model;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Thu Jul 8 00:58:10 2010
@@ -42,7 +42,7 @@ import org.apache.hadoop.mapreduce.lib.o
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.WeightedVectorWritable;
-import org.apache.mahout.clustering.dirichlet.models.VectorModelDistribution;
+import org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution;
import org.apache.mahout.clustering.kmeans.OutputLogFilter;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
@@ -261,8 +261,8 @@ public final class DirichletDriver {
SecurityException, NoSuchMethodException, IllegalArgumentException, InvocationTargetException {
ClassLoader ccl = Thread.currentThread().getContextClassLoader();
- Class<? extends VectorModelDistribution> cl = ccl.loadClass(modelFactory).asSubclass(VectorModelDistribution.class);
- VectorModelDistribution factory = cl.newInstance();
+ Class<? extends AbstractVectorModelDistribution> cl = ccl.loadClass(modelFactory).asSubclass(AbstractVectorModelDistribution.class);
+ AbstractVectorModelDistribution factory = cl.newInstance();
Class<? extends Vector> vcl = ccl.loadClass(modelPrototype).asSubclass(Vector.class);
Constructor<? extends Vector> v = vcl.getConstructor(int.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java Thu Jul 8 00:58:10 2010
@@ -49,7 +49,7 @@ public class DirichletState<O> {
clusters.add(new DirichletCluster<O>(m));
}
// sample the mixture parameters from a Dirichlet distribution on the totalCounts
- mixture = UncommonDistributions.rDirichlet(totalCounts(), alpha0);
+ mixture = UncommonDistributions.rDirichlet(computeTotalCounts(), alpha0);
}
public DirichletState() { }
@@ -87,6 +87,10 @@ public class DirichletState<O> {
}
public Vector totalCounts() {
+ return computeTotalCounts();
+ }
+
+ private Vector computeTotalCounts() {
Vector result = new DenseVector(numClusters);
for (int i = 0; i < numClusters; i++) {
result.set(i, clusters.get(i).getTotalCount());
@@ -125,6 +129,7 @@ public class DirichletState<O> {
return mix * pdf;
}
+ @SuppressWarnings("unchecked")
public Model<O>[] getModels() {
Model<O>[] result = new Model[numClusters];
for (int i = 0; i < numClusters; i++) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java Thu Jul 8 00:58:10 2010
@@ -51,6 +51,7 @@ public class JsonClusterAdapter implemen
return obj;
}
+ @SuppressWarnings("unchecked")
@Override
public DirichletCluster<?> deserialize(JsonElement json,
Type typeOfT,
Copied: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java (from r961473, mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java?p2=mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java&p1=mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java&r1=961473&r2=961548&rev=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java Thu Jul 8 00:58:10 2010
@@ -18,16 +18,16 @@ package org.apache.mahout.clustering.dir
import org.apache.mahout.math.VectorWritable;
-public abstract class VectorModelDistribution implements ModelDistribution<VectorWritable> {
+public abstract class AbstractVectorModelDistribution implements ModelDistribution<VectorWritable> {
// a prototype instance used for creating prior model distributions using like(). It
// should be of the class and cardinality desired for the particular application.
private VectorWritable modelPrototype;
- protected VectorModelDistribution() {
+ protected AbstractVectorModelDistribution() {
}
- protected VectorModelDistribution(VectorWritable modelPrototype) {
+ protected AbstractVectorModelDistribution(VectorWritable modelPrototype) {
this.modelPrototype = modelPrototype;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java Thu Jul 8 00:58:10 2010
@@ -26,7 +26,7 @@ import org.apache.mahout.math.VectorWrit
* Uses a Normal Distribution to sample the prior model values. Model values have a vector standard deviation,
* allowing assymetrical regions to be covered by a model.
*/
-public class AsymmetricSampledNormalDistribution extends VectorModelDistribution {
+public class AsymmetricSampledNormalDistribution extends AbstractVectorModelDistribution {
public AsymmetricSampledNormalDistribution() {
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java Thu Jul 8 00:58:10 2010
@@ -41,7 +41,7 @@ public class L1Model implements Model<Ve
private Vector coefficients;
- private int count;
+ private int counter;
private Vector observed;
@@ -56,17 +56,17 @@ public class L1Model implements Model<Ve
@Override
public void computeParameters() {
- coefficients = observed.divide(count);
+ coefficients = observed.divide(counter);
}
@Override
public int count() {
- return count;
+ return counter;
}
@Override
public void observe(VectorWritable x) {
- count++;
+ counter++;
x.get().addTo(observed);
}
@@ -78,7 +78,7 @@ public class L1Model implements Model<Ve
@Override
public void readFields(DataInput in) throws IOException {
this.id = in.readInt();
- this.count = in.readInt();
+ this.counter = in.readInt();
VectorWritable temp = new VectorWritable();
temp.readFields(in);
this.coefficients = temp.get();
@@ -88,7 +88,7 @@ public class L1Model implements Model<Ve
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(id);
- out.writeInt(count);
+ out.writeInt(counter);
VectorWritable.writeVector(out, coefficients);
}
@@ -104,7 +104,7 @@ public class L1Model implements Model<Ve
@Override
public String asFormatString(String[] bindings) {
StringBuilder buf = new StringBuilder();
- buf.append("l1m{n=").append(count).append(" c=");
+ buf.append("l1m{n=").append(counter).append(" c=");
if (coefficients != null) {
buf.append(ClusterBase.formatVector(coefficients, bindings));
}
@@ -132,7 +132,7 @@ public class L1Model implements Model<Ve
@Override
public int getNumPoints() {
- return count;
+ return counter;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java Thu Jul 8 00:58:10 2010
@@ -24,7 +24,7 @@ import org.apache.mahout.math.VectorWrit
* An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
* Uses a L1Distribution
*/
-public class L1ModelDistribution extends VectorModelDistribution {
+public class L1ModelDistribution extends AbstractVectorModelDistribution {
public L1ModelDistribution(VectorWritable modelPrototype) {
super(modelPrototype);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java Thu Jul 8 00:58:10 2010
@@ -24,7 +24,7 @@ import org.apache.mahout.math.VectorWrit
* An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
* Uses a Normal Distribution
*/
-public class NormalModelDistribution extends VectorModelDistribution {
+public class NormalModelDistribution extends AbstractVectorModelDistribution {
public NormalModelDistribution(VectorWritable modelPrototype) {
super(modelPrototype);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Thu Jul 8 00:58:10 2010
@@ -41,28 +41,6 @@ public class MeanShiftCanopyClusterer {
private DistanceMeasure measure;
public MeanShiftCanopyClusterer(Configuration configuration) {
- configure(configuration);
- }
-
- public MeanShiftCanopyClusterer(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) {
- config(aMeasure, aT1, aT2, aDelta);
- }
-
- public double getT1() {
- return t1;
- }
-
- public double getT2() {
- return t2;
- }
-
- /**
- * Configure the Canopy and its distance measure
- *
- * @param configuration
- * the JobConf for this job
- */
- public void configure(Configuration configuration) {
try {
measure = Class.forName(configuration.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY))
.asSubclass(DistanceMeasure.class).newInstance();
@@ -80,13 +58,7 @@ public class MeanShiftCanopyClusterer {
convergenceDelta = Double.parseDouble(configuration.get(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY));
}
- /**
- * Configure the Canopy for unit tests
- *
- * @param aDelta
- * the convergence criteria
- */
- public void config(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) {
+ public MeanShiftCanopyClusterer(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) {
// nextCanopyId = 100; // so canopyIds will sort properly // never read?
measure = aMeasure;
t1 = aT1;
@@ -94,6 +66,14 @@ public class MeanShiftCanopyClusterer {
convergenceDelta = aDelta;
}
+ public double getT1() {
+ return t1;
+ }
+
+ public double getT2() {
+ return t2;
+ }
+
/**
* Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then add the
* center of each to the other. If it covers any other canopies (norm<T2), then merge the given canopy with
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Thu Jul 8 00:58:10 2010
@@ -49,7 +49,7 @@ public class MeanShiftCanopyReducer exte
for (MeanShiftCanopy canopy : canopies) {
boolean converged = clusterer.shiftToMean(canopy);
if (converged) {
- // TODO: reporter.incrCounter("Clustering", "Converged Clusters", 1);
+ context.getCounter("Clustering", "Converged Clusters").increment(1);
}
allConverged = converged && allConverged;
context.write(new Text(canopy.getIdentifier()), canopy);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java Thu Jul 8 00:58:10 2010
@@ -212,7 +212,7 @@ public class TestDirichletClustering ext
assertNotNull(result);
}
- public void testDirichletCluster100_3() {
+ public void testDirichletCluster100C3() {
System.out.println("testDirichletCluster100");
generateSamples(40, 1, 1, 3, 3);
generateSamples(30, 1, 0, 0.1, 3);
@@ -226,7 +226,7 @@ public class TestDirichletClustering ext
assertNotNull(result);
}
- public void testDirichletCluster100s_3() {
+ public void testDirichletCluster100sC3() {
System.out.println("testDirichletCluster100s");
generateSamples(40, 1, 1, 3, 3);
generateSamples(30, 1, 0, 0.1, 3);
@@ -240,7 +240,7 @@ public class TestDirichletClustering ext
assertNotNull(result);
}
- public void testDirichletCluster100as_3() {
+ public void testDirichletCluster100asC3() {
System.out.println("testDirichletCluster100as");
generateSamples(40, 1, 1, 3, 3);
generateSamples(30, 1, 0, 0.1, 3);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Thu Jul 8 00:58:10 2010
@@ -43,8 +43,6 @@ public class TestMeanShift extends Mahou
private Vector[] raw = null;
- private Configuration conf;
-
// DistanceMeasure manhattanDistanceMeasure = new ManhattanDistanceMeasure();
private final DistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
@@ -93,7 +91,6 @@ public class TestMeanShift extends Mahou
@Override
protected void setUp() throws Exception {
super.setUp();
- conf = new Configuration();
raw = new Vector[100];
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 10; j++) {
@@ -137,6 +134,7 @@ public class TestMeanShift extends Mahou
printImage(canopies);
System.out.println(iter++);
}
+ assertTrue(true);
}
/**
@@ -290,6 +288,7 @@ public class TestMeanShift extends Mahou
*/
public void testCanopyEuclideanMRJob() throws Exception {
Path input = getTestTempDirPath("testdata");
+ Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(input.toUri(), conf);
List<VectorWritable> points = new ArrayList<VectorWritable>();
for (Vector v : raw) {
@@ -300,7 +299,6 @@ public class TestMeanShift extends Mahou
// now run the Job
Path output = getTestTempDirPath("output");
MeanShiftCanopyDriver.runJob(input, output, EuclideanDistanceMeasure.class.getName(), 4, 1, 0.5, 10, false, false);
- Configuration conf = new Configuration();
Path outPart = new Path(output, "clusters-3/part-r-00000");
SequenceFile.Reader reader = new SequenceFile.Reader(fs, outPart, conf);
Text key = new Text();
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Thu Jul 8 00:58:10 2010
@@ -23,7 +23,7 @@ public class DummyStatusReporter extends
public Counter getCounter(String group, String name) {
if (!counterGroups.containsKey(group + name))
counterGroups.put(group + name, new DummyCounter());
- return counterGroups.get(name);
+ return counterGroups.get(group+name);
}
@Override