You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/12/20 09:44:26 UTC

svn commit: r1552538 - in /mahout/trunk: CHANGELOG examples/bin/classify-20newsgroups.sh

Author: smarthi
Date: Fri Dec 20 08:44:25 2013
New Revision: 1552538

URL: http://svn.apache.org/r1552538
Log:
MAHOUT-1384: Executing the MR version of Naive Bayes/CNB of classify_20newgroups.sh fails in seqdirectory step.

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/examples/bin/classify-20newsgroups.sh

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1552538&r1=1552537&r2=1552538&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Fri Dec 20 08:44:25 2013
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.9 - unreleased
 
+  MAHOUT-1384: Executing the MR version of Naive Bayes/CNB of classify_20newgroups.sh fails in seqdirectory step (smarthi)
+
   MAHOUT-1382: Upgrade Mahout third party jars for 0.9 Release (smarthi)
 
   MAHOUT-1380: Streaming KMeans fails when executed in Sequential Mode (smarthi)

Modified: mahout/trunk/examples/bin/classify-20newsgroups.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/classify-20newsgroups.sh?rev=1552538&r1=1552537&r2=1552538&view=diff
==============================================================================
--- mahout/trunk/examples/bin/classify-20newsgroups.sh (original)
+++ mahout/trunk/examples/bin/classify-20newsgroups.sh Fri Dec 20 08:44:25 2013
@@ -33,6 +33,14 @@ if [ "$0" != "$SCRIPT_PATH" ] && [ "$SCR
 fi
 START_PATH=`pwd`
 
+if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] ; then
+  HADOOP="$HADOOP_HOME/bin/hadoop"
+  if [ ! -e $HADOOP ]; then
+    echo "Can't find hadoop in $HADOOP, exiting"
+    exit 1
+  fi
+fi
+
 WORK_DIR=/tmp/mahout-work-${USER}
 algorithm=( cnaivebayes naivebayes sgd clean)
 if [ -n "$1" ]; then
@@ -84,10 +92,18 @@ if [ "x$alg" == "xnaivebayes"  -o  "x$al
   mkdir ${WORK_DIR}/20news-all
   cp -R ${WORK_DIR}/20news-bydate/*/* ${WORK_DIR}/20news-all
 
+  if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] ; then
+    echo "Copying 20newsgroups data to HDFS"
+    set +e
+    $HADOOP dfs -rmr ${WORK_DIR}/20news-all
+    set -e
+    $HADOOP dfs -put ${WORK_DIR}/20news-all ${WORK_DIR}/20news-all
+  fi
+
   echo "Creating sequence files from 20newsgroups data"
   ./bin/mahout seqdirectory \
     -i ${WORK_DIR}/20news-all \
-    -o ${WORK_DIR}/20news-seq -ow -xm sequential
+    -o ${WORK_DIR}/20news-seq -ow
 
   echo "Converting sequence files to vectors"
   ./bin/mahout seq2sparse \