You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2012/05/11 10:58:56 UTC

svn commit: r1337068 - in /mahout/trunk: core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/mahout/classifier/bayes/ core/src/test/java/org/apache/mahout/classifier/bayes/ examples/bin/ examples/src/main/java/org/apache/maho...

Author: ssc
Date: Fri May 11 08:58:55 2012
New Revision: 1337068

URL: http://svn.apache.org/viewvc?rev=1337068&view=rev
Log:
MAHOUT-1010 Remove the old naive bayes implementation (org.apache.mahout.classifier.bayes) from the codebase

Removed:
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/
    mahout/trunk/examples/src/main/java/org/apache/mahout/text/
    mahout/trunk/src/conf/pagerank.props
    mahout/trunk/src/conf/prepare20newsgroups.props
    mahout/trunk/src/conf/randomwalkwithrestart.props
    mahout/trunk/src/conf/testclassifier.props
    mahout/trunk/src/conf/trainclassifier.props
Modified:
    mahout/trunk/examples/bin/classify-20newsgroups.sh
    mahout/trunk/src/conf/driver.classes.props

Modified: mahout/trunk/examples/bin/classify-20newsgroups.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/classify-20newsgroups.sh?rev=1337068&r1=1337067&r2=1337068&view=diff
==============================================================================
--- mahout/trunk/examples/bin/classify-20newsgroups.sh (original)
+++ mahout/trunk/examples/bin/classify-20newsgroups.sh Fri May 11 08:58:55 2012
@@ -23,7 +23,7 @@
 #  examples/bin/build-20news.sh
 
 if [ "$1" = "--help" ] || [ "$1" = "--?" ]; then
-  echo "This script runs SGD and Bayes classifiers over the classic 20 News Groups."
+  echo "This script runs the SGD classifier over the classic 20 News Groups."
   exit
 fi
 
@@ -34,14 +34,13 @@ fi
 START_PATH=`pwd`
 
 WORK_DIR=/tmp/mahout-work-${USER}
-algorithm=( naivebayes sgd clean)
+algorithm=( sgd clean)
 if [ -n "$1" ]; then
   choice=$1
 else
   echo "Please select a number to choose the corresponding task to run"
   echo "1. ${algorithm[0]}"
-  echo "2. ${algorithm[1]}"
-  echo "3. ${algorithm[2]} -- cleans up the work area in $WORK_DIR"
+  echo "2. ${algorithm[2]} -- cleans up the work area in $WORK_DIR"
   read -p "Enter your choice : " choice
 fi
 
@@ -68,62 +67,7 @@ cd ../..
 
 set -e
 
-if [ "x$alg" == "xnaivebayes" ]; then
-  echo "Preparing Training Data"
-  ./bin/mahout org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups \
-    -p ${WORK_DIR}/20news-bydate/20news-bydate-train \
-    -o ${WORK_DIR}/20news-bydate/bayes-train-input \
-    -a org.apache.mahout.vectorizer.DefaultAnalyzer \
-    -c UTF-8
-
-  echo "Preparing Test Data"
-
-  ./bin/mahout org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups \
-    -p ${WORK_DIR}/20news-bydate/20news-bydate-test \
-    -o ${WORK_DIR}/20news-bydate/bayes-test-input \
-    -a org.apache.mahout.vectorizer.DefaultAnalyzer \
-    -c UTF-8
-
-  TEST_METHOD="sequential"
-
-  # if we're set up to run on a cluster..
-  if [ "$HADOOP_HOME" != "" ]; then
-      # mapreduce test method used on hadoop
-      TEST_METHOD="mapreduce"
-
-      set +e
-      hadoop dfs -rmr \
-        ${WORK_DIR}/20news-bydate/bayes-train-input
-
-      hadoop dfs -rmr \
-        ${WORK_DIR}/20news-bydate/bayes-test-input
-
-      set -e
-      hadoop dfs -put \
-        ${WORK_DIR}/20news-bydate/bayes-train-input \
-        ${WORK_DIR}/20news-bydate/bayes-train-input
-
-      hadoop dfs -put \
-        ${WORK_DIR}/20news-bydate/bayes-test-input \
-        ${WORK_DIR}/20news-bydate/bayes-test-input
-  fi
-
-
-  ./bin/mahout trainclassifier \
-    -i ${WORK_DIR}/20news-bydate/bayes-train-input \
-    -o ${WORK_DIR}/20news-bydate/bayes-model \
-    -type bayes \
-    -ng 1 \
-    -source hdfs
-
-  ./bin/mahout testclassifier \
-    -m ${WORK_DIR}/20news-bydate/bayes-model \
-    -d ${WORK_DIR}/20news-bydate/bayes-test-input \
-    -type bayes \
-    -ng 1 \
-    -source hdfs \
-    -method ${TEST_METHOD}
-elif [ "x$alg" == "xsgd" ]; then
+if [ "x$alg" == "xsgd" ]; then
   if [ ! -e "/tmp/news-group.model" ]; then
     echo "Training on ${WORK_DIR}/20news-bydate/20news-bydate-train/"
     ./bin/mahout org.apache.mahout.classifier.sgd.TrainNewsGroups ${WORK_DIR}/20news-bydate/20news-bydate-train/

Modified: mahout/trunk/src/conf/driver.classes.props
URL: http://svn.apache.org/viewvc/mahout/trunk/src/conf/driver.classes.props?rev=1337068&r1=1337067&r2=1337068&view=diff
==============================================================================
--- mahout/trunk/src/conf/driver.classes.props (original)
+++ mahout/trunk/src/conf/driver.classes.props Fri May 11 08:58:55 2012
@@ -39,12 +39,6 @@ org.apache.mahout.clustering.topdown.pos
 #Freq. Itemset Mining
 org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver = fpg : Frequent Pattern Growth
 #Classification
-#old bayes
-org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups = prepare20newsgroups : Reformat 20 newsgroups data
-org.apache.mahout.classifier.bayes.WikipediaXmlSplitter = wikipediaXMLSplitter : Reads wikipedia data and creates ch
-org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorDriver = wikipediaDataSetCreator : Splits data set of wikipedia wrt feature like country
-org.apache.mahout.classifier.bayes.TestClassifier = testclassifier : Test the text based Bayes Classifier
-org.apache.mahout.classifier.bayes.TrainClassifier = trainclassifier : Train the text based Bayes Classifier
 #new bayes
 org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob = trainnb : Train the Vector-based Bayes classifier
 org.apache.mahout.classifier.naivebayes.test.TestNaiveBayesDriver = testnb : Test the Vector-based Bayes classifier
@@ -68,8 +62,4 @@ org.apache.mahout.cf.taste.hadoop.als.Fa
 org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob = itemsimilarity : Compute the item-item-similarities for item-based collaborative filtering
 org.apache.mahout.cf.taste.hadoop.item.RecommenderJob = recommenditembased : Compute recommendations using item-based collaborative filtering
 org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob = parallelALS : ALS-WR factorization of a rating matrix
-org.apache.mahout.cf.taste.hadoop.als.RecommenderJob = recommendfactorized : Compute recommendations using the factorization of a rating matrix
-
-#Link Analysis
-org.apache.mahout.graph.linkanalysis.PageRankJob = pagerank : compute the PageRank of a graph
-org.apache.mahout.graph.linkanalysis.RandomWalkWithRestartJob = randomwalkwithrestart : compute all other vertices' proximity to a source vertex in a graph
+org.apache.mahout.cf.taste.hadoop.als.RecommenderJob = recommendfactorized : Compute recommendations using the factorization of a rating matrix
\ No newline at end of file