You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/06/06 15:28:27 UTC

svn commit: r1346882 - in /mahout/trunk: bin/mahout examples/bin/asf-email-examples.sh

Author: robinanil
Date: Wed Jun  6 13:28:26 2012
New Revision: 1346882

URL: http://svn.apache.org/viewvc?rev=1346882&view=rev
Log:
Improving performance for local jobs, increased split, sort sizes, reduces number of jobs, increased jvm

Modified:
    mahout/trunk/bin/mahout
    mahout/trunk/examples/bin/asf-email-examples.sh

Modified: mahout/trunk/bin/mahout
URL: http://svn.apache.org/viewvc/mahout/trunk/bin/mahout?rev=1346882&r1=1346881&r2=1346882&view=diff
==============================================================================
--- mahout/trunk/bin/mahout (original)
+++ mahout/trunk/bin/mahout Wed Jun  6 13:28:26 2012
@@ -184,7 +184,16 @@ fi
 
 MAHOUT_OPTS="$MAHOUT_OPTS -Dhadoop.log.dir=$MAHOUT_LOG_DIR"
 MAHOUT_OPTS="$MAHOUT_OPTS -Dhadoop.log.file=$MAHOUT_LOGFILE"
-MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.min.split.size=256MB"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.min.split.size=512MB"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.map.child.java.opts=-Xmx4096m"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.reduce.child.java.opts=-Xmx4096m"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.output.compress=true"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.compress.map.output=true"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.map.tasks=1"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.reduce.tasks=1"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dio.sort.factor=30"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dio.sort.mb=1024"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dio.file.buffer.size=32786"
 
 if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
   MAHOUT_OPTS="$MAHOUT_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"

Modified: mahout/trunk/examples/bin/asf-email-examples.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/asf-email-examples.sh?rev=1346882&r1=1346881&r2=1346882&view=diff
==============================================================================
--- mahout/trunk/examples/bin/asf-email-examples.sh (original)
+++ mahout/trunk/examples/bin/asf-email-examples.sh Wed Jun  6 13:28:26 2012
@@ -31,7 +31,7 @@ function fileExists() {
 }
 
 function removeFolder() {
-  if [ "$MAHOUT_LOCAL" != "" ]; then
+  if [ "$MAHOUT_LOCAL" == "" ]; then
     rm -rf $1
   else
     if fileExists "$1"; then
@@ -188,7 +188,7 @@ elif [ "x$alg" == "xclassification" ]; t
     LABEL="$SPLIT/labels"
     if ! fileExists "$MAIL_OUT/chunk-0"; then
       echo "Converting Mail files to Sequence Files"
-      $MAHOUT org.apache.mahout.text.SequenceFilesFromMailArchives --charset "UTF-8" --subject --body --input $ASF_ARCHIVES --output $MAIL_OUT -chunk 128
+      $MAHOUT org.apache.mahout.text.SequenceFilesFromMailArchives --charset "UTF-8" --subject --body --input $ASF_ARCHIVES --output $MAIL_OUT -chunk 768 --stripQuoted
     fi
     if ! fileExists "$SEQ2SP/dictionary.file-0"; then
       echo "Converting the files to sparse vectors"
@@ -227,7 +227,7 @@ elif [ "x$alg" == "xclassification" ]; t
     LABEL="$SPLIT/labels"
     if ! fileExists "$MAIL_OUT/chunk-0"; then
       echo "Converting Mail files to Sequence Files"
-      $MAHOUT org.apache.mahout.text.SequenceFilesFromMailArchives --charset "UTF-8" --subject --body --input $ASF_ARCHIVES --output $MAIL_OUT --stripQuoted
+      $MAHOUT org.apache.mahout.text.SequenceFilesFromMailArchives --charset "UTF-8" --subject --body --input $ASF_ARCHIVES --output $MAIL_OUT --stripQuoted -chunk 768
     fi
     echo "Converting the files to sparse vectors in $SEQ2SP"
     if ! fileExists "$SEQ2SP/part-m-00000"; then