You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2011/10/10 18:36:35 UTC

svn commit: r1181061 - /mahout/trunk/examples/bin/build-asf-email.sh

Author: gsingers
Date: Mon Oct 10 16:36:35 2011
New Revision: 1181061

URL: http://svn.apache.org/viewvc?rev=1181061&view=rev
Log:
MAHOUT-798: restrict the number of items per label to avoid overtraining

Modified:
    mahout/trunk/examples/bin/build-asf-email.sh

Modified: mahout/trunk/examples/bin/build-asf-email.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-asf-email.sh?rev=1181061&r1=1181060&r2=1181061&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-asf-email.sh (original)
+++ mahout/trunk/examples/bin/build-asf-email.sh Mon Oct 10 16:36:35 2011
@@ -126,7 +126,7 @@ elif [ "x$alg" == "xclassification" ]; t
     $MAHOUT seq2sparse --input $MAIL_OUT --output $SEQ2SP --norm 2 --weight TFIDF --namedVector --maxDFPercent 90 --minSupport 2 --analyzerName org.apache.mahout.text.MailArchivesClusteringAnalyzer
     #We need to modify the vectors to have a better label
     echo "Converting vector labels"
-    $MAHOUT org.apache.mahout.classifier.email.PrepEmailVectorsDriver --input "$SEQ2SP/tfidf-vectors" --output $SEQ2SPLABEL --overwrite
+    $MAHOUT org.apache.mahout.classifier.email.PrepEmailVectorsDriver --input "$SEQ2SP/tfidf-vectors" --output $SEQ2SPLABEL --overwrite --maxItemsPerLabel 1000
   fi
   if [ "x$OVER" == "xover" ] || [ ! -e "$TRAIN/part-m-00000" ]; then
     #setup train/test files