You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/06/06 15:28:27 UTC
svn commit: r1346882 - in /mahout/trunk: bin/mahout
examples/bin/asf-email-examples.sh
Author: robinanil
Date: Wed Jun 6 13:28:26 2012
New Revision: 1346882
URL: http://svn.apache.org/viewvc?rev=1346882&view=rev
Log:
Improving performance for local jobs, increased split, sort sizes, reduces number of jobs, increased jvm
Modified:
mahout/trunk/bin/mahout
mahout/trunk/examples/bin/asf-email-examples.sh
Modified: mahout/trunk/bin/mahout
URL: http://svn.apache.org/viewvc/mahout/trunk/bin/mahout?rev=1346882&r1=1346881&r2=1346882&view=diff
==============================================================================
--- mahout/trunk/bin/mahout (original)
+++ mahout/trunk/bin/mahout Wed Jun 6 13:28:26 2012
@@ -184,7 +184,16 @@ fi
MAHOUT_OPTS="$MAHOUT_OPTS -Dhadoop.log.dir=$MAHOUT_LOG_DIR"
MAHOUT_OPTS="$MAHOUT_OPTS -Dhadoop.log.file=$MAHOUT_LOGFILE"
-MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.min.split.size=256MB"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.min.split.size=512MB"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.map.child.java.opts=-Xmx4096m"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.reduce.child.java.opts=-Xmx4096m"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.output.compress=true"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.compress.map.output=true"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.map.tasks=1"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dmapred.reduce.tasks=1"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dio.sort.factor=30"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dio.sort.mb=1024"
+MAHOUT_OPTS="$MAHOUT_OPTS -Dio.file.buffer.size=32786"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
MAHOUT_OPTS="$MAHOUT_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
Modified: mahout/trunk/examples/bin/asf-email-examples.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/asf-email-examples.sh?rev=1346882&r1=1346881&r2=1346882&view=diff
==============================================================================
--- mahout/trunk/examples/bin/asf-email-examples.sh (original)
+++ mahout/trunk/examples/bin/asf-email-examples.sh Wed Jun 6 13:28:26 2012
@@ -31,7 +31,7 @@ function fileExists() {
}
function removeFolder() {
- if [ "$MAHOUT_LOCAL" != "" ]; then
+ if [ "$MAHOUT_LOCAL" == "" ]; then
rm -rf $1
else
if fileExists "$1"; then
@@ -188,7 +188,7 @@ elif [ "x$alg" == "xclassification" ]; t
LABEL="$SPLIT/labels"
if ! fileExists "$MAIL_OUT/chunk-0"; then
echo "Converting Mail files to Sequence Files"
- $MAHOUT org.apache.mahout.text.SequenceFilesFromMailArchives --charset "UTF-8" --subject --body --input $ASF_ARCHIVES --output $MAIL_OUT -chunk 128
+ $MAHOUT org.apache.mahout.text.SequenceFilesFromMailArchives --charset "UTF-8" --subject --body --input $ASF_ARCHIVES --output $MAIL_OUT -chunk 768 --stripQuoted
fi
if ! fileExists "$SEQ2SP/dictionary.file-0"; then
echo "Converting the files to sparse vectors"
@@ -227,7 +227,7 @@ elif [ "x$alg" == "xclassification" ]; t
LABEL="$SPLIT/labels"
if ! fileExists "$MAIL_OUT/chunk-0"; then
echo "Converting Mail files to Sequence Files"
- $MAHOUT org.apache.mahout.text.SequenceFilesFromMailArchives --charset "UTF-8" --subject --body --input $ASF_ARCHIVES --output $MAIL_OUT --stripQuoted
+ $MAHOUT org.apache.mahout.text.SequenceFilesFromMailArchives --charset "UTF-8" --subject --body --input $ASF_ARCHIVES --output $MAIL_OUT --stripQuoted -chunk 768
fi
echo "Converting the files to sparse vectors in $SEQ2SP"
if ! fileExists "$SEQ2SP/part-m-00000"; then