You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2011/10/20 00:28:23 UTC

svn commit: r1186540 - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java examples/bin/build-reuters.sh

Author: jeastman
Date: Wed Oct 19 22:28:22 2011
New Revision: 1186540

URL: http://svn.apache.org/viewvc?rev=1186540&view=rev
Log:
MAHOUT-846: Simplified DistanceMeasureCluster.pdf() to avoid exp(...). Increased DPC iterations to 20 in build-reuters. All tests run

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
    mahout/trunk/examples/bin/build-reuters.sh

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java?rev=1186540&r1=1186539&r2=1186540&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java Wed Oct 19 22:28:22 2011
@@ -37,7 +37,7 @@ public class DistanceMeasureCluster exte
 
   public DistanceMeasureCluster() {
   }
-  
+
   @Override
   public void configure(Configuration job) {
     if (getMeasure() != null) {
@@ -69,7 +69,7 @@ public class DistanceMeasureCluster exte
 
   @Override
   public double pdf(VectorWritable vw) {
-    return Math.exp(-measure.distance(vw.get(), getCenter()));
+    return 1 / (1 + measure.distance(vw.get(), getCenter()));
   }
 
   @Override
@@ -82,7 +82,8 @@ public class DistanceMeasureCluster exte
   }
 
   /**
-   * @param measure the measure to set
+   * @param measure
+   *          the measure to set
    */
   public void setMeasure(DistanceMeasure measure) {
     this.measure = measure;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java?rev=1186540&r1=1186539&r2=1186540&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java Wed Oct 19 22:28:22 2011
@@ -127,10 +127,10 @@ public final class TestClusterClassifier
   public void testDMClusterClassification() {
     ClusterClassifier classifier = newDMClassifier();
     Vector pdf = classifier.classify(new DenseVector(2));
-    assertEquals("[0,0]", "[0.107, 0.787, 0.107]",
+    assertEquals("[0,0]", "[0.200, 0.600, 0.200]",
         AbstractCluster.formatVector(pdf, null));
     pdf = classifier.classify(new DenseVector(2).assign(2));
-    assertEquals("[2,2]", "[0.867, 0.117, 0.016]",
+    assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
         AbstractCluster.formatVector(pdf, null));
   }
   
@@ -143,10 +143,10 @@ public final class TestClusterClassifier
     models.add(new Canopy(new DenseVector(2).assign(-1), 2, measure));
     ClusterClassifier classifier = new ClusterClassifier(models);
     Vector pdf = classifier.classify(new DenseVector(2));
-    assertEquals("[0,0]", "[0.107, 0.787, 0.107]",
+    assertEquals("[0,0]", "[0.200, 0.600, 0.200]",
         AbstractCluster.formatVector(pdf, null));
     pdf = classifier.classify(new DenseVector(2).assign(2));
-    assertEquals("[2,2]", "[0.867, 0.117, 0.016]",
+    assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
         AbstractCluster.formatVector(pdf, null));
   }
   
@@ -154,10 +154,10 @@ public final class TestClusterClassifier
   public void testClusterClassification() {
     ClusterClassifier classifier = newClusterClassifier();
     Vector pdf = classifier.classify(new DenseVector(2));
-    assertEquals("[0,0]", "[0.107, 0.787, 0.107]",
+    assertEquals("[0,0]", "[0.200, 0.600, 0.200]",
         AbstractCluster.formatVector(pdf, null));
     pdf = classifier.classify(new DenseVector(2).assign(2));
-    assertEquals("[2,2]", "[0.867, 0.117, 0.016]",
+    assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
         AbstractCluster.formatVector(pdf, null));
   }
   

Modified: mahout/trunk/examples/bin/build-reuters.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=1186540&r1=1186539&r2=1186540&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Wed Oct 19 22:28:22 2011
@@ -142,7 +142,7 @@ elif [ "x$clustertype" == "xdirichlet" ]
   && \
   $MAHOUT dirichlet \
     -i ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/tfidf-vectors \
-    -o ${WORK_DIR}/reuters-dirichlet -k 20 -ow -x 10 -a0 2 \
+    -o ${WORK_DIR}/reuters-dirichlet -k 20 -ow -x 20 -a0 2 \
     -md org.apache.mahout.clustering.dirichlet.models.DistanceMeasureClusterDistribution \
     -mp org.apache.mahout.math.DenseVector \
     -dm org.apache.mahout.common.distance.CosineDistanceMeasure \