You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2011/10/19 18:28:52 UTC
svn commit: r1186306 - /mahout/trunk/examples/bin/build-reuters.sh
Author: jeastman
Date: Wed Oct 19 16:28:52 2011
New Revision: 1186306
URL: http://svn.apache.org/viewvc?rev=1186306&view=rev
Log:
Modifying script to include FuzzyK and Dirichlet options
Modified:
mahout/trunk/examples/bin/build-reuters.sh
Modified: mahout/trunk/examples/bin/build-reuters.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=1186306&r1=1186305&r2=1186306&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Wed Oct 19 16:28:52 2011
@@ -37,11 +37,13 @@ fi
if [ "$1" = "-ni" ]; then
clustertype=kmeans
else
- algorithm=( kmeans lda )
+ algorithm=( kmeans fuzzykmeans lda dirichlet)
echo "Please select a number to choose the corresponding clustering algorithm"
echo "1. ${algorithm[0]} clustering"
echo "2. ${algorithm[1]} clustering"
+ echo "3. ${algorithm[2]} clustering"
+ echo "4. ${algorithm[3]} clustering"
read -p "Enter your choice : " choice
echo "ok. You chose $choice and we'll use ${algorithm[$choice-1]} Clustering"
@@ -103,6 +105,22 @@ if [ "x$clustertype" == "xkmeans" ]; the
-s ${WORK_DIR}/reuters-kmeans/clusters-*-final \
-d ${WORK_DIR}/reuters-out-seqdir-sparse-kmeans/dictionary.file-0 \
-dt sequencefile -b 100 -n 20
+elif [ "x$clustertype" == "xfuzzykmeans" ]; then
+ $MAHOUT seq2sparse \
+ -i ${WORK_DIR}/reuters-out-seqdir/ \
+ -o ${WORK_DIR}/reuters-out-seqdir-sparse-fkmeans \
+ && \
+ $MAHOUT fkmeans \
+ -i ${WORK_DIR}/reuters-out-seqdir-sparse-fkmeans/tfidf-vectors/ \
+ -c ${WORK_DIR}/reuters-fkmeans-clusters \
+ -o ${WORK_DIR}/reuters-fkmeans \
+ -dm org.apache.mahout.common.distance.CosineDistanceMeasure \
+ -x 10 -k 20 -ow -m 1.1 \
+ && \
+ $MAHOUT clusterdump \
+ -s ${WORK_DIR}/reuters-fkmeans/clusters-*-final \
+ -d ${WORK_DIR}/reuters-out-seqdir-sparse-fkmeans/dictionary.file-0 \
+ -dt sequencefile -b 100 -n 20
elif [ "x$clustertype" == "xlda" ]; then
$MAHOUT seq2sparse \
-i ${WORK_DIR}/reuters-out-seqdir/ \
@@ -117,6 +135,19 @@ elif [ "x$clustertype" == "xlda" ]; then
-i ${WORK_DIR}/reuters-lda/state-20 \
-d ${WORK_DIR}/reuters-out-seqdir-sparse-lda/dictionary.file-0 \
-dt sequencefile
+elif [ "x$clustertype" == "xdirichlet" ]; then
+ $MAHOUT seq2sparse \
+ -i ${WORK_DIR}/reuters-out-seqdir/ \
+ -o ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet \
+ && \
+ $MAHOUT dirichlet \
+ -i ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/tfidf-vectors \
+ -o ${WORK_DIR}/reuters-dirichlet -k 20 -ow -x 20 \
+ && \
+ $MAHOUT clusterdump \
+ -s ${WORK_DIR}/reuters-dirichlet/clusters-*-final \
+ -d ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/dictionary.file-0 \
+ -dt sequencefile -b 100 -n 20
else
echo "unknown cluster type: $clustertype";
fi