You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/06/04 18:40:20 UTC

svn commit: r1346031 - /mahout/trunk/examples/bin/asf-email-examples.sh

Author: robinanil
Date: Mon Jun  4 16:40:20 2012
New Revision: 1346031

URL: http://svn.apache.org/viewvc?rev=1346031&view=rev
Log:
MAHOUT-1006 Fixes to run asf classification examples on naivebayes using encoder

Modified:
    mahout/trunk/examples/bin/asf-email-examples.sh

Modified: mahout/trunk/examples/bin/asf-email-examples.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/asf-email-examples.sh?rev=1346031&r1=1346030&r2=1346031&view=diff
==============================================================================
--- mahout/trunk/examples/bin/asf-email-examples.sh (original)
+++ mahout/trunk/examples/bin/asf-email-examples.sh Mon Jun  4 16:40:20 2012
@@ -196,12 +196,13 @@ elif [ "x$alg" == "xclassification" ]; t
     fi
     if ! fileExists "$SEQ2SP/dictionary.file-0"; then
       echo "Converting the files to sparse vectors"
-      $MAHOUT seq2sparse --input $MAIL_OUT --output $SEQ2SP --norm 2 --weight TFIDF --namedVector -lnorm --maxDFPercent 90 --minSupport 2 --analyzerName org.apache.mahout.text.MailArchivesClusteringAnalyzer -chunk 1000
-    fi
+      # $MAHOUT seq2sparse --input $MAIL_OUT --output $SEQ2SP --norm 2 --weight TFIDF --namedVector -lnorm --maxDFPercent 90 --minSupport 2 --analyzerName org.apache.mahout.text.MailArchivesClusteringAnalyzer -chunk 1000
+      $MAHOUT seq2encoded --input $MAIL_OUT --output $SEQ2SP --analyzerName org.apache.mahout.text.MailArchivesClusteringAnalyzer --cardinality 100000 -ow
+	fi
     if ! fileExists "$TRAIN/part-m-00000"; then
       #setup train/test files
       echo "Creating training and test inputs"
-      $MAHOUT split --input $SEQ2SP/tfidf-vectors --trainingOutput $TRAIN --testOutput $TEST --randomSelectionPct 20 --overwrite --sequenceFiles -xm sequential
+      $MAHOUT split --input $SEQ2SP --trainingOutput $TRAIN --testOutput $TEST --randomSelectionPct 20 --overwrite --sequenceFiles -xm sequential
     fi
     MODEL="$CLASS/model"
     if [ "x$classAlg" == "xstandard" ]; then