You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2012/06/05 06:30:43 UTC
svn commit: r1346237 - /mahout/trunk/examples/bin/cluster-reuters.sh
Author: ssc
Date: Tue Jun 5 04:30:42 2012
New Revision: 1346237
URL: http://svn.apache.org/viewvc?rev=1346237&view=rev
Log:
MAHOUT-1024 cluster_reuters.sh still relies on old (now removed) lda implementation
Modified:
mahout/trunk/examples/bin/cluster-reuters.sh
Modified: mahout/trunk/examples/bin/cluster-reuters.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/cluster-reuters.sh?rev=1346237&r1=1346236&r2=1346237&view=diff
==============================================================================
--- mahout/trunk/examples/bin/cluster-reuters.sh (original)
+++ mahout/trunk/examples/bin/cluster-reuters.sh Tue Jun 5 04:30:42 2012
@@ -39,7 +39,7 @@ if [ ! -e $MAHOUT ]; then
exit 1
fi
-algorithm=( kmeans fuzzykmeans lda dirichlet minhash)
+algorithm=( kmeans fuzzykmeans dirichlet minhash)
if [ -n "$1" ]; then
choice=$1
else
@@ -48,7 +48,6 @@ else
echo "2. ${algorithm[1]} clustering"
echo "3. ${algorithm[2]} clustering"
echo "4. ${algorithm[3]} clustering"
- echo "5. ${algorithm[4]} clustering"
read -p "Enter your choice : " choice
fi
@@ -107,10 +106,13 @@ if [ "x$clustertype" == "xkmeans" ]; the
-x 10 -k 20 -ow --clustering \
&& \
$MAHOUT clusterdump \
- -s ${WORK_DIR}/reuters-kmeans/clusters-*-final \
+ -i ${WORK_DIR}/reuters-kmeans/clusters-*-final \
+ -o ${WORK_DIR}/reuters-kmeans/clusterdump \
-d ${WORK_DIR}/reuters-out-seqdir-sparse-kmeans/dictionary.file-0 \
- -dt sequencefile -b 100 -n 20 --evaluate -dm org.apache.mahout.common.distance.CosineDistanceMeasure \
- --pointsDir ${WORK_DIR}/reuters-kmeans/clusteredPoints
+ -dt sequencefile -b 100 -n 20 --evaluate -dm org.apache.mahout.common.distance.CosineDistanceMeasure -sp 0 \
+ --pointsDir ${WORK_DIR}/reuters-kmeans/clusteredPoints \
+ && \
+ cat ${WORK_DIR}/reuters-kmeans/clusterdump
elif [ "x$clustertype" == "xfuzzykmeans" ]; then
$MAHOUT seq2sparse \
-i ${WORK_DIR}/reuters-out-seqdir/ \
@@ -127,20 +129,13 @@ elif [ "x$clustertype" == "xfuzzykmeans"
-s ${WORK_DIR}/reuters-fkmeans/clusters-*-final \
-d ${WORK_DIR}/reuters-out-seqdir-sparse-fkmeans/dictionary.file-0 \
-dt sequencefile -b 100 -n 20
-elif [ "x$clustertype" == "xlda" ]; then
- $MAHOUT seq2sparse \
- -i ${WORK_DIR}/reuters-out-seqdir/ \
- -o ${WORK_DIR}/reuters-out-seqdir-sparse-lda \
- -wt tf -seq -nr 3 --namedVector \
- && \
- $MAHOUT lda \
- -i ${WORK_DIR}/reuters-out-seqdir-sparse-lda/tf-vectors \
- -o ${WORK_DIR}/reuters-lda -k 20 -ow -x 20 \
- && \
- $MAHOUT ldatopics \
- -i ${WORK_DIR}/reuters-lda/state-20 \
- -d ${WORK_DIR}/reuters-out-seqdir-sparse-lda/dictionary.file-0 \
- -dt sequencefile
+ $MAHOUT clusterdump \
+ -i ${WORK_DIR}/reuters-fkmeans/clusters-*-final \
+ -o ${WORK_DIR}/reuters-fkmeans/clusterdump \
+ -d ${WORK_DIR}/reuters-out-seqdir-sparse-fkmeans/dictionary.file-0 \
+ -dt sequencefile -b 100 -n 20 -sp 0 \
+ && \
+ cat ${WORK_DIR}/reuters-fkmeans/clusterdump
elif [ "x$clustertype" == "xdirichlet" ]; then
$MAHOUT seq2sparse \
-i ${WORK_DIR}/reuters-out-seqdir/ \
@@ -157,6 +152,13 @@ elif [ "x$clustertype" == "xdirichlet" ]
-s ${WORK_DIR}/reuters-dirichlet/clusters-*-final \
-d ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/dictionary.file-0 \
-dt sequencefile -b 100 -n 20
+ $MAHOUT clusterdump \
+ -i ${WORK_DIR}/reuters-dirichlet/clusters-*-final \
+ -o ${WORK_DIR}/reuters-dirichlet/clusterdump \
+ -d ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/dictionary.file-0 \
+ -dt sequencefile -b 100 -n 20 -sp 0 \
+ && \
+ cat ${WORK_DIR}/reuters-dirichlet/clusterdump
elif [ "x$clustertype" == "xminhash" ]; then
$MAHOUT seq2sparse \
-i ${WORK_DIR}/reuters-out-seqdir/ \