You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2011/10/20 00:28:23 UTC
svn commit: r1186540 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
examples/bin/build-reuters.sh
Author: jeastman
Date: Wed Oct 19 22:28:22 2011
New Revision: 1186540
URL: http://svn.apache.org/viewvc?rev=1186540&view=rev
Log:
MAHOUT-846: Simplified DistanceMeasureCluster.pdf() to avoid exp(...). Increased DPC iterations to 20 in build-reuters. All tests run
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
mahout/trunk/examples/bin/build-reuters.sh
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java?rev=1186540&r1=1186539&r2=1186540&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DistanceMeasureCluster.java Wed Oct 19 22:28:22 2011
@@ -37,7 +37,7 @@ public class DistanceMeasureCluster exte
public DistanceMeasureCluster() {
}
-
+
@Override
public void configure(Configuration job) {
if (getMeasure() != null) {
@@ -69,7 +69,7 @@ public class DistanceMeasureCluster exte
@Override
public double pdf(VectorWritable vw) {
- return Math.exp(-measure.distance(vw.get(), getCenter()));
+ return 1 / (1 + measure.distance(vw.get(), getCenter()));
}
@Override
@@ -82,7 +82,8 @@ public class DistanceMeasureCluster exte
}
/**
- * @param measure the measure to set
+ * @param measure
+ * the measure to set
*/
public void setMeasure(DistanceMeasure measure) {
this.measure = measure;
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java?rev=1186540&r1=1186539&r2=1186540&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java Wed Oct 19 22:28:22 2011
@@ -127,10 +127,10 @@ public final class TestClusterClassifier
public void testDMClusterClassification() {
ClusterClassifier classifier = newDMClassifier();
Vector pdf = classifier.classify(new DenseVector(2));
- assertEquals("[0,0]", "[0.107, 0.787, 0.107]",
+ assertEquals("[0,0]", "[0.200, 0.600, 0.200]",
AbstractCluster.formatVector(pdf, null));
pdf = classifier.classify(new DenseVector(2).assign(2));
- assertEquals("[2,2]", "[0.867, 0.117, 0.016]",
+ assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
AbstractCluster.formatVector(pdf, null));
}
@@ -143,10 +143,10 @@ public final class TestClusterClassifier
models.add(new Canopy(new DenseVector(2).assign(-1), 2, measure));
ClusterClassifier classifier = new ClusterClassifier(models);
Vector pdf = classifier.classify(new DenseVector(2));
- assertEquals("[0,0]", "[0.107, 0.787, 0.107]",
+ assertEquals("[0,0]", "[0.200, 0.600, 0.200]",
AbstractCluster.formatVector(pdf, null));
pdf = classifier.classify(new DenseVector(2).assign(2));
- assertEquals("[2,2]", "[0.867, 0.117, 0.016]",
+ assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
AbstractCluster.formatVector(pdf, null));
}
@@ -154,10 +154,10 @@ public final class TestClusterClassifier
public void testClusterClassification() {
ClusterClassifier classifier = newClusterClassifier();
Vector pdf = classifier.classify(new DenseVector(2));
- assertEquals("[0,0]", "[0.107, 0.787, 0.107]",
+ assertEquals("[0,0]", "[0.200, 0.600, 0.200]",
AbstractCluster.formatVector(pdf, null));
pdf = classifier.classify(new DenseVector(2).assign(2));
- assertEquals("[2,2]", "[0.867, 0.117, 0.016]",
+ assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
AbstractCluster.formatVector(pdf, null));
}
Modified: mahout/trunk/examples/bin/build-reuters.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=1186540&r1=1186539&r2=1186540&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Wed Oct 19 22:28:22 2011
@@ -142,7 +142,7 @@ elif [ "x$clustertype" == "xdirichlet" ]
&& \
$MAHOUT dirichlet \
-i ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/tfidf-vectors \
- -o ${WORK_DIR}/reuters-dirichlet -k 20 -ow -x 10 -a0 2 \
+ -o ${WORK_DIR}/reuters-dirichlet -k 20 -ow -x 20 -a0 2 \
-md org.apache.mahout.clustering.dirichlet.models.DistanceMeasureClusterDistribution \
-mp org.apache.mahout.math.DenseVector \
-dm org.apache.mahout.common.distance.CosineDistanceMeasure \