You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/11/28 00:27:11 UTC
svn commit: r1546232 - /mahout/trunk/examples/bin/cluster-reuters.sh
Author: smarthi
Date: Wed Nov 27 23:27:11 2013
New Revision: 1546232
URL: http://svn.apache.org/r1546232
Log:
MAHOUT-1347: Add Streaming K-Means clustering algorithm to examples/bin/cluster-reuters.sh
Modified:
mahout/trunk/examples/bin/cluster-reuters.sh
Modified: mahout/trunk/examples/bin/cluster-reuters.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/cluster-reuters.sh?rev=1546232&r1=1546231&r2=1546232&view=diff
==============================================================================
--- mahout/trunk/examples/bin/cluster-reuters.sh (original)
+++ mahout/trunk/examples/bin/cluster-reuters.sh Wed Nov 27 23:27:11 2013
@@ -39,14 +39,15 @@ if [ ! -e $MAHOUT ]; then
exit 1
fi
-algorithm=( kmeans fuzzykmeans lda)
+algorithm=( kmeans fuzzykmeans lda streamingkmeans)
if [ -n "$1" ]; then
choice=$1
else
echo "Please select a number to choose the corresponding clustering algorithm"
echo "1. ${algorithm[0]} clustering"
echo "2. ${algorithm[1]} clustering"
- echo "3. ${algorithm[2]} clustering"
+ echo "3. ${algorithm[2]} clustering"
+ echo "4. ${algorithm[3]} clustering"
read -p "Enter your choice : " choice
fi
@@ -169,6 +170,23 @@ elif [ "x$clustertype" == "xlda" ]; then
-dt sequencefile -sort ${WORK_DIR}/reuters-lda-topics/part-m-00000 \
&& \
cat ${WORK_DIR}/reuters-lda/vectordump
+elif [ "x$clustertype" == "xstreamingkmeans" ]; then
+ $MAHOUT seq2sparse \
+ -i ${WORK_DIR}/reuters-out-seqdir/ \
+ -o ${WORK_DIR}/reuters-out-seqdir-sparse-streamingkmeans -ow --maxDFPercent 85 --namedVector \
+ && \
+ rm -rf ${WORK_DIR}/reuters-streamingkmeans \
+ && \
+ $MAHOUT streamingkmeans \
+ -i ${WORK_DIR}/reuters-out-seqdir-sparse-streamingkmeans/tfidf-vectors/ \
+ --tempDir ${WORK_DIR}/tmp \
+ -o ${WORK_DIR}/reuters-streamingkmeans \
+ -sc org.apache.mahout.math.neighborhood.FastProjectionSearch \
+ -dm org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure \
+ -k 20 -km 200 -rskm \
+ && \
+ $MAHOUT seqdumper \
+ -i ${WORK_DIR}/reuters-streamingkmeans
else
echo "unknown cluster type: $clustertype"
fi