You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/07/08 02:58:11 UTC

svn commit: r961548 - in /mahout/trunk/core/src: main/java/org/apache/mahout/clustering/dirichlet/ main/java/org/apache/mahout/clustering/dirichlet/models/ main/java/org/apache/mahout/clustering/meanshift/ test/java/org/apache/mahout/clustering/dirichl...

Author: jeastman
Date: Thu Jul  8 00:58:10 2010
New Revision: 961548

URL: http://svn.apache.org/viewvc?rev=961548&view=rev
Log:
MAHOUT-167: Minor code cleanup to make PMD happier. Also fixed minor bug in DummyStatusReporter. All tests run

Added:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java
      - copied, changed from r961473, mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java
Removed:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java Thu Jul  8 00:58:10 2010
@@ -77,6 +77,7 @@ public class DirichletCluster<O> impleme
   }
 
   /** Reads a typed Model instance from the input stream */
+  @SuppressWarnings("unchecked")
   public static <O> Model<O> readModel(DataInput in) throws IOException {
     String modelClassName = in.readUTF();
     Model<O> model;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Thu Jul  8 00:58:10 2010
@@ -42,7 +42,7 @@ import org.apache.hadoop.mapreduce.lib.o
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.WeightedVectorWritable;
-import org.apache.mahout.clustering.dirichlet.models.VectorModelDistribution;
+import org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution;
 import org.apache.mahout.clustering.kmeans.OutputLogFilter;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.HadoopUtil;
@@ -261,8 +261,8 @@ public final class DirichletDriver {
              SecurityException, NoSuchMethodException, IllegalArgumentException, InvocationTargetException {
 
     ClassLoader ccl = Thread.currentThread().getContextClassLoader();
-    Class<? extends VectorModelDistribution> cl = ccl.loadClass(modelFactory).asSubclass(VectorModelDistribution.class);
-    VectorModelDistribution factory = cl.newInstance();
+    Class<? extends AbstractVectorModelDistribution> cl = ccl.loadClass(modelFactory).asSubclass(AbstractVectorModelDistribution.class);
+    AbstractVectorModelDistribution factory = cl.newInstance();
 
     Class<? extends Vector> vcl = ccl.loadClass(modelPrototype).asSubclass(Vector.class);
     Constructor<? extends Vector> v = vcl.getConstructor(int.class);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java Thu Jul  8 00:58:10 2010
@@ -49,7 +49,7 @@ public class DirichletState<O> {
       clusters.add(new DirichletCluster<O>(m));
     }
     // sample the mixture parameters from a Dirichlet distribution on the totalCounts
-    mixture = UncommonDistributions.rDirichlet(totalCounts(), alpha0);
+    mixture = UncommonDistributions.rDirichlet(computeTotalCounts(), alpha0);
   }
   
   public DirichletState() { }
@@ -87,6 +87,10 @@ public class DirichletState<O> {
   }
   
   public Vector totalCounts() {
+    return computeTotalCounts();
+  }
+
+  private Vector computeTotalCounts() {
     Vector result = new DenseVector(numClusters);
     for (int i = 0; i < numClusters; i++) {
       result.set(i, clusters.get(i).getTotalCount());
@@ -125,6 +129,7 @@ public class DirichletState<O> {
     return mix * pdf;
   }
   
+  @SuppressWarnings("unchecked")
   public Model<O>[] getModels() {
     Model<O>[] result = new Model[numClusters];
     for (int i = 0; i < numClusters; i++) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java Thu Jul  8 00:58:10 2010
@@ -51,6 +51,7 @@ public class JsonClusterAdapter implemen
     return obj;
   }
   
+  @SuppressWarnings("unchecked")
   @Override
   public DirichletCluster<?> deserialize(JsonElement json,
                                          Type typeOfT,

Copied: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java (from r961473, mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java?p2=mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java&p1=mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java&r1=961473&r2=961548&rev=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AbstractVectorModelDistribution.java Thu Jul  8 00:58:10 2010
@@ -18,16 +18,16 @@ package org.apache.mahout.clustering.dir
 
 import org.apache.mahout.math.VectorWritable;
 
-public abstract class VectorModelDistribution implements ModelDistribution<VectorWritable> {
+public abstract class AbstractVectorModelDistribution implements ModelDistribution<VectorWritable> {
 
   // a prototype instance used for creating prior model distributions using like(). It
   // should be of the class and cardinality desired for the particular application.
   private VectorWritable modelPrototype;
 
-  protected VectorModelDistribution() {
+  protected AbstractVectorModelDistribution() {
   }
   
-  protected VectorModelDistribution(VectorWritable modelPrototype) {
+  protected AbstractVectorModelDistribution(VectorWritable modelPrototype) {
     this.modelPrototype = modelPrototype;
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java Thu Jul  8 00:58:10 2010
@@ -26,7 +26,7 @@ import org.apache.mahout.math.VectorWrit
  * Uses a Normal Distribution to sample the prior model values. Model values have a vector standard deviation,
  * allowing assymetrical regions to be covered by a model.
  */
-public class AsymmetricSampledNormalDistribution extends VectorModelDistribution {
+public class AsymmetricSampledNormalDistribution extends AbstractVectorModelDistribution {
   
   public AsymmetricSampledNormalDistribution() {
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java Thu Jul  8 00:58:10 2010
@@ -41,7 +41,7 @@ public class L1Model implements Model<Ve
 
   private Vector coefficients;
 
-  private int count;
+  private int counter;
 
   private Vector observed;
 
@@ -56,17 +56,17 @@ public class L1Model implements Model<Ve
 
   @Override
   public void computeParameters() {
-    coefficients = observed.divide(count);
+    coefficients = observed.divide(counter);
   }
 
   @Override
   public int count() {
-    return count;
+    return counter;
   }
 
   @Override
   public void observe(VectorWritable x) {
-    count++;
+    counter++;
     x.get().addTo(observed);
   }
 
@@ -78,7 +78,7 @@ public class L1Model implements Model<Ve
   @Override
   public void readFields(DataInput in) throws IOException {
     this.id = in.readInt();
-    this.count = in.readInt();
+    this.counter = in.readInt();
     VectorWritable temp = new VectorWritable();
     temp.readFields(in);
     this.coefficients = temp.get();
@@ -88,7 +88,7 @@ public class L1Model implements Model<Ve
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeInt(id);
-    out.writeInt(count);
+    out.writeInt(counter);
     VectorWritable.writeVector(out, coefficients);
   }
 
@@ -104,7 +104,7 @@ public class L1Model implements Model<Ve
   @Override
   public String asFormatString(String[] bindings) {
     StringBuilder buf = new StringBuilder();
-    buf.append("l1m{n=").append(count).append(" c=");
+    buf.append("l1m{n=").append(counter).append(" c=");
     if (coefficients != null) {
       buf.append(ClusterBase.formatVector(coefficients, bindings));
     }
@@ -132,7 +132,7 @@ public class L1Model implements Model<Ve
 
   @Override
   public int getNumPoints() {
-    return count;
+    return counter;
   }
 
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java Thu Jul  8 00:58:10 2010
@@ -24,7 +24,7 @@ import org.apache.mahout.math.VectorWrit
  * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
  * Uses a L1Distribution
  */
-public class L1ModelDistribution extends VectorModelDistribution {
+public class L1ModelDistribution extends AbstractVectorModelDistribution {
   
   public L1ModelDistribution(VectorWritable modelPrototype) {
     super(modelPrototype);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java Thu Jul  8 00:58:10 2010
@@ -24,7 +24,7 @@ import org.apache.mahout.math.VectorWrit
  * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
  * Uses a Normal Distribution
  */
-public class NormalModelDistribution extends VectorModelDistribution {
+public class NormalModelDistribution extends AbstractVectorModelDistribution {
   
   public NormalModelDistribution(VectorWritable modelPrototype) {
     super(modelPrototype);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Thu Jul  8 00:58:10 2010
@@ -41,28 +41,6 @@ public class MeanShiftCanopyClusterer {
   private DistanceMeasure measure;
 
   public MeanShiftCanopyClusterer(Configuration configuration) {
-    configure(configuration);
-  }
-
-  public MeanShiftCanopyClusterer(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) {
-    config(aMeasure, aT1, aT2, aDelta);
-  }
-
-  public double getT1() {
-    return t1;
-  }
-
-  public double getT2() {
-    return t2;
-  }
-
-  /**
-   * Configure the Canopy and its distance measure
-   * 
-   * @param configuration
-   *          the JobConf for this job
-   */
-  public void configure(Configuration configuration) {
     try {
       measure = Class.forName(configuration.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY))
           .asSubclass(DistanceMeasure.class).newInstance();
@@ -80,13 +58,7 @@ public class MeanShiftCanopyClusterer {
     convergenceDelta = Double.parseDouble(configuration.get(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY));
   }
 
-  /**
-   * Configure the Canopy for unit tests
-   * 
-   * @param aDelta
-   *          the convergence criteria
-   */
-  public void config(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) {
+  public MeanShiftCanopyClusterer(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) {
     // nextCanopyId = 100; // so canopyIds will sort properly // never read?
     measure = aMeasure;
     t1 = aT1;
@@ -94,6 +66,14 @@ public class MeanShiftCanopyClusterer {
     convergenceDelta = aDelta;
   }
 
+  public double getT1() {
+    return t1;
+  }
+
+  public double getT2() {
+    return t2;
+  }
+
   /**
    * Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then add the
    * center of each to the other. If it covers any other canopies (norm<T2), then merge the given canopy with

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Thu Jul  8 00:58:10 2010
@@ -49,7 +49,7 @@ public class MeanShiftCanopyReducer exte
     for (MeanShiftCanopy canopy : canopies) {
       boolean converged = clusterer.shiftToMean(canopy);
       if (converged) {
-      // TODO:  reporter.incrCounter("Clustering", "Converged Clusters", 1);
+      context.getCounter("Clustering", "Converged Clusters").increment(1);
       }
       allConverged = converged && allConverged;
       context.write(new Text(canopy.getIdentifier()), canopy);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java Thu Jul  8 00:58:10 2010
@@ -212,7 +212,7 @@ public class TestDirichletClustering ext
     assertNotNull(result);
   }
 
-  public void testDirichletCluster100_3() {
+  public void testDirichletCluster100C3() {
     System.out.println("testDirichletCluster100");
     generateSamples(40, 1, 1, 3, 3);
     generateSamples(30, 1, 0, 0.1, 3);
@@ -226,7 +226,7 @@ public class TestDirichletClustering ext
     assertNotNull(result);
   }
 
-  public void testDirichletCluster100s_3() {
+  public void testDirichletCluster100sC3() {
     System.out.println("testDirichletCluster100s");
     generateSamples(40, 1, 1, 3, 3);
     generateSamples(30, 1, 0, 0.1, 3);
@@ -240,7 +240,7 @@ public class TestDirichletClustering ext
     assertNotNull(result);
   }
 
-  public void testDirichletCluster100as_3() {
+  public void testDirichletCluster100asC3() {
     System.out.println("testDirichletCluster100as");
     generateSamples(40, 1, 1, 3, 3);
     generateSamples(30, 1, 0, 0.1, 3);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Thu Jul  8 00:58:10 2010
@@ -43,8 +43,6 @@ public class TestMeanShift extends Mahou
 
   private Vector[] raw = null;
 
-  private Configuration conf;
-
   // DistanceMeasure manhattanDistanceMeasure = new ManhattanDistanceMeasure();
 
   private final DistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
@@ -93,7 +91,6 @@ public class TestMeanShift extends Mahou
   @Override
   protected void setUp() throws Exception {
     super.setUp();
-    conf = new Configuration();
     raw = new Vector[100];
     for (int i = 0; i < 10; i++) {
       for (int j = 0; j < 10; j++) {
@@ -137,6 +134,7 @@ public class TestMeanShift extends Mahou
       printImage(canopies);
       System.out.println(iter++);
     }
+    assertTrue(true);
   }
 
   /**
@@ -290,6 +288,7 @@ public class TestMeanShift extends Mahou
    */
   public void testCanopyEuclideanMRJob() throws Exception {
     Path input = getTestTempDirPath("testdata");
+    Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(input.toUri(), conf);
     List<VectorWritable> points = new ArrayList<VectorWritable>();
     for (Vector v : raw) {
@@ -300,7 +299,6 @@ public class TestMeanShift extends Mahou
     // now run the Job
     Path output = getTestTempDirPath("output");
     MeanShiftCanopyDriver.runJob(input, output, EuclideanDistanceMeasure.class.getName(), 4, 1, 0.5, 10, false, false);
-    Configuration conf = new Configuration();
     Path outPart = new Path(output, "clusters-3/part-r-00000");
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, outPart, conf);
     Text key = new Text();

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=961548&r1=961547&r2=961548&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Thu Jul  8 00:58:10 2010
@@ -23,7 +23,7 @@ public class DummyStatusReporter extends
   public Counter getCounter(String group, String name) {
     if (!counterGroups.containsKey(group + name))
       counterGroups.put(group + name, new DummyCounter());
-    return counterGroups.get(name);
+    return counterGroups.get(group+name);
   }
 
   @Override