You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/12/20 09:44:26 UTC
svn commit: r1552538 - in /mahout/trunk: CHANGELOG
examples/bin/classify-20newsgroups.sh
Author: smarthi
Date: Fri Dec 20 08:44:25 2013
New Revision: 1552538
URL: http://svn.apache.org/r1552538
Log:
MAHOUT-1384: Executing the MR version of Naive Bayes/CNB of classify_20newgroups.sh fails in seqdirectory step.
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/examples/bin/classify-20newsgroups.sh
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1552538&r1=1552537&r2=1552538&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Fri Dec 20 08:44:25 2013
@@ -2,6 +2,8 @@ Mahout Change Log
Release 0.9 - unreleased
+ MAHOUT-1384: Executing the MR version of Naive Bayes/CNB of classify_20newgroups.sh fails in seqdirectory step (smarthi)
+
MAHOUT-1382: Upgrade Mahout third party jars for 0.9 Release (smarthi)
MAHOUT-1380: Streaming KMeans fails when executed in Sequential Mode (smarthi)
Modified: mahout/trunk/examples/bin/classify-20newsgroups.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/classify-20newsgroups.sh?rev=1552538&r1=1552537&r2=1552538&view=diff
==============================================================================
--- mahout/trunk/examples/bin/classify-20newsgroups.sh (original)
+++ mahout/trunk/examples/bin/classify-20newsgroups.sh Fri Dec 20 08:44:25 2013
@@ -33,6 +33,14 @@ if [ "$0" != "$SCRIPT_PATH" ] && [ "$SCR
fi
START_PATH=`pwd`
+if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] ; then
+ HADOOP="$HADOOP_HOME/bin/hadoop"
+ if [ ! -e $HADOOP ]; then
+ echo "Can't find hadoop in $HADOOP, exiting"
+ exit 1
+ fi
+fi
+
WORK_DIR=/tmp/mahout-work-${USER}
algorithm=( cnaivebayes naivebayes sgd clean)
if [ -n "$1" ]; then
@@ -84,10 +92,18 @@ if [ "x$alg" == "xnaivebayes" -o "x$al
mkdir ${WORK_DIR}/20news-all
cp -R ${WORK_DIR}/20news-bydate/*/* ${WORK_DIR}/20news-all
+ if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] ; then
+ echo "Copying 20newsgroups data to HDFS"
+ set +e
+ $HADOOP dfs -rmr ${WORK_DIR}/20news-all
+ set -e
+ $HADOOP dfs -put ${WORK_DIR}/20news-all ${WORK_DIR}/20news-all
+ fi
+
echo "Creating sequence files from 20newsgroups data"
./bin/mahout seqdirectory \
-i ${WORK_DIR}/20news-all \
- -o ${WORK_DIR}/20news-seq -ow -xm sequential
+ -o ${WORK_DIR}/20news-seq -ow
echo "Converting sequence files to vectors"
./bin/mahout seq2sparse \