You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/07/30 01:36:53 UTC
svn commit: r1508241 - /mahout/trunk/examples/bin/cluster-reuters.sh
Author: smarthi
Date: Mon Jul 29 23:36:53 2013
New Revision: 1508241
URL: http://svn.apache.org/r1508241
Log:
MAHOUT-1296: Removed deprecated algorithms, removed references to Minhash clustering from cluster_reuters.sh
Modified:
mahout/trunk/examples/bin/cluster-reuters.sh
Modified: mahout/trunk/examples/bin/cluster-reuters.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/cluster-reuters.sh?rev=1508241&r1=1508240&r2=1508241&view=diff
==============================================================================
--- mahout/trunk/examples/bin/cluster-reuters.sh (original)
+++ mahout/trunk/examples/bin/cluster-reuters.sh Mon Jul 29 23:36:53 2013
@@ -39,7 +39,7 @@ if [ ! -e $MAHOUT ]; then
exit 1
fi
-algorithm=( kmeans fuzzykmeans dirichlet lda minhash)
+algorithm=( kmeans fuzzykmeans dirichlet lda)
if [ -n "$1" ]; then
choice=$1
else
@@ -48,7 +48,6 @@ else
echo "2. ${algorithm[1]} clustering"
echo "3. ${algorithm[2]} clustering"
echo "4. ${algorithm[3]} clustering"
- echo "5. ${algorithm[4]} clustering"
read -p "Enter your choice : " choice
fi
@@ -103,7 +102,7 @@ if [ ! -e ${WORK_DIR}/reuters-out-seqdir
fi
fi
echo "Converting to Sequence Files from Directory"
- $MAHOUT seqdirectory -i ${WORK_DIR}/reuters-out -o ${WORK_DIR}/reuters-out-seqdir -c UTF-8 -chunk 5
+ $MAHOUT seqdirectory -i ${WORK_DIR}/reuters-out -o ${WORK_DIR}/reuters-out-seqdir -c UTF-8 -chunk 64 -xm sequential
fi
if [ "x$clustertype" == "xkmeans" ]; then
@@ -190,14 +189,6 @@ elif [ "x$clustertype" == "xlda" ]; then
-dt sequencefile -sort ${WORK_DIR}/reuters-lda-topics/part-m-00000 \
&& \
cat ${WORK_DIR}/reuters-lda/vectordump
-elif [ "x$clustertype" == "xminhash" ]; then
- $MAHOUT seq2sparse \
- -i ${WORK_DIR}/reuters-out-seqdir/ \
- -o ${WORK_DIR}/reuters-out-seqdir-sparse-minhash --maxDFPercent 85 --namedVector \
- && \
- $MAHOUT org.apache.mahout.clustering.minhash.MinHashDriver \
- -i ${WORK_DIR}/reuters-out-seqdir-sparse-minhash/tfidf-vectors \
- -o ${WORK_DIR}/reuters-minhash --overwrite
else
echo "unknown cluster type: $clustertype"
fi