You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2011/10/19 22:23:53 UTC
svn commit: r1186452 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java
examples/bin/build-reuters.sh
Author: jeastman
Date: Wed Oct 19 20:23:50 2011
New Revision: 1186452
URL: http://svn.apache.org/viewvc?rev=1186452&view=rev
Log:
MAHOUT-846: Fixed prior of DMCD to have Gaussian element values vs. all 0. Updated build-reuters.sh to use DMCD. All tests run and reuters looks much better.
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java
mahout/trunk/examples/bin/build-reuters.sh
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java?rev=1186452&r1=1186451&r2=1186452&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistanceMeasureClusterDistribution.java Wed Oct 19 20:23:50 2011
@@ -19,14 +19,16 @@ package org.apache.mahout.clustering.dir
import org.apache.mahout.clustering.DistanceMeasureCluster;
import org.apache.mahout.clustering.Model;
+import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
/**
- * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
- * Models use a DistanceMeasure to calculate pdf values.
+ * An implementation of the ModelDistribution interface suitable for testing the
+ * DirichletCluster algorithm. Models use a DistanceMeasure to calculate pdf
+ * values.
*/
public class DistanceMeasureClusterDistribution extends AbstractVectorModelDistribution {
@@ -48,9 +50,12 @@ public class DistanceMeasureClusterDistr
@Override
public Model<VectorWritable>[] sampleFromPrior(int howMany) {
Model<VectorWritable>[] result = new DistanceMeasureCluster[howMany];
+ Vector prototype = getModelPrototype().get().like();
+ for (int i = 0; i < prototype.size(); i++) {
+ prototype.setQuick(i, UncommonDistributions.rNorm(0, 1));
+ }
for (int i = 0; i < howMany; i++) {
- Vector prototype = getModelPrototype().get();
- result[i] = new DistanceMeasureCluster(prototype.like(), i, measure);
+ result[i] = new DistanceMeasureCluster(prototype, i, measure);
}
return result;
}
Modified: mahout/trunk/examples/bin/build-reuters.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=1186452&r1=1186451&r2=1186452&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Wed Oct 19 20:23:50 2011
@@ -142,12 +142,15 @@ elif [ "x$clustertype" == "xdirichlet" ]
&& \
$MAHOUT dirichlet \
-i ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/tfidf-vectors \
- -o ${WORK_DIR}/reuters-dirichlet -k 20 -ow -x 20 \
+ -o ${WORK_DIR}/reuters-dirichlet -k 20 -ow -x 10 -a0 2 \
+ -md org.apache.mahout.clustering.dirichlet.models.DistanceMeasureClusterDistribution \
+ -mp org.apache.mahout.math.DenseVector \
+ -dm org.apache.mahout.common.distance.CosineDistanceMeasure \
&& \
$MAHOUT clusterdump \
-s ${WORK_DIR}/reuters-dirichlet/clusters-*-final \
-d ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/dictionary.file-0 \
-dt sequencefile -b 100 -n 20
else
- echo "unknown cluster type: $clustertype";
+ echo "unknown cluster type: $clustertype"
fi