You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2012/05/11 10:58:56 UTC
svn commit: r1337068 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/classifier/
core/src/main/java/org/apache/mahout/classifier/bayes/
core/src/test/java/org/apache/mahout/classifier/bayes/ examples/bin/
examples/src/main/java/org/apache/maho...
Author: ssc
Date: Fri May 11 08:58:55 2012
New Revision: 1337068
URL: http://svn.apache.org/viewvc?rev=1337068&view=rev
Log:
MAHOUT-1010 Remove the old naive bayes implementation (org.apache.mahout.classifier.bayes) from the codebase
Removed:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/
mahout/trunk/examples/src/main/java/org/apache/mahout/text/
mahout/trunk/src/conf/pagerank.props
mahout/trunk/src/conf/prepare20newsgroups.props
mahout/trunk/src/conf/randomwalkwithrestart.props
mahout/trunk/src/conf/testclassifier.props
mahout/trunk/src/conf/trainclassifier.props
Modified:
mahout/trunk/examples/bin/classify-20newsgroups.sh
mahout/trunk/src/conf/driver.classes.props
Modified: mahout/trunk/examples/bin/classify-20newsgroups.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/classify-20newsgroups.sh?rev=1337068&r1=1337067&r2=1337068&view=diff
==============================================================================
--- mahout/trunk/examples/bin/classify-20newsgroups.sh (original)
+++ mahout/trunk/examples/bin/classify-20newsgroups.sh Fri May 11 08:58:55 2012
@@ -23,7 +23,7 @@
# examples/bin/build-20news.sh
if [ "$1" = "--help" ] || [ "$1" = "--?" ]; then
- echo "This script runs SGD and Bayes classifiers over the classic 20 News Groups."
+ echo "This script runs the SGD classifier over the classic 20 News Groups."
exit
fi
@@ -34,14 +34,13 @@ fi
START_PATH=`pwd`
WORK_DIR=/tmp/mahout-work-${USER}
-algorithm=( naivebayes sgd clean)
+algorithm=( sgd clean)
if [ -n "$1" ]; then
choice=$1
else
echo "Please select a number to choose the corresponding task to run"
echo "1. ${algorithm[0]}"
- echo "2. ${algorithm[1]}"
- echo "3. ${algorithm[2]} -- cleans up the work area in $WORK_DIR"
+ echo "2. ${algorithm[2]} -- cleans up the work area in $WORK_DIR"
read -p "Enter your choice : " choice
fi
@@ -68,62 +67,7 @@ cd ../..
set -e
-if [ "x$alg" == "xnaivebayes" ]; then
- echo "Preparing Training Data"
- ./bin/mahout org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups \
- -p ${WORK_DIR}/20news-bydate/20news-bydate-train \
- -o ${WORK_DIR}/20news-bydate/bayes-train-input \
- -a org.apache.mahout.vectorizer.DefaultAnalyzer \
- -c UTF-8
-
- echo "Preparing Test Data"
-
- ./bin/mahout org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups \
- -p ${WORK_DIR}/20news-bydate/20news-bydate-test \
- -o ${WORK_DIR}/20news-bydate/bayes-test-input \
- -a org.apache.mahout.vectorizer.DefaultAnalyzer \
- -c UTF-8
-
- TEST_METHOD="sequential"
-
- # if we're set up to run on a cluster..
- if [ "$HADOOP_HOME" != "" ]; then
- # mapreduce test method used on hadoop
- TEST_METHOD="mapreduce"
-
- set +e
- hadoop dfs -rmr \
- ${WORK_DIR}/20news-bydate/bayes-train-input
-
- hadoop dfs -rmr \
- ${WORK_DIR}/20news-bydate/bayes-test-input
-
- set -e
- hadoop dfs -put \
- ${WORK_DIR}/20news-bydate/bayes-train-input \
- ${WORK_DIR}/20news-bydate/bayes-train-input
-
- hadoop dfs -put \
- ${WORK_DIR}/20news-bydate/bayes-test-input \
- ${WORK_DIR}/20news-bydate/bayes-test-input
- fi
-
-
- ./bin/mahout trainclassifier \
- -i ${WORK_DIR}/20news-bydate/bayes-train-input \
- -o ${WORK_DIR}/20news-bydate/bayes-model \
- -type bayes \
- -ng 1 \
- -source hdfs
-
- ./bin/mahout testclassifier \
- -m ${WORK_DIR}/20news-bydate/bayes-model \
- -d ${WORK_DIR}/20news-bydate/bayes-test-input \
- -type bayes \
- -ng 1 \
- -source hdfs \
- -method ${TEST_METHOD}
-elif [ "x$alg" == "xsgd" ]; then
+if [ "x$alg" == "xsgd" ]; then
if [ ! -e "/tmp/news-group.model" ]; then
echo "Training on ${WORK_DIR}/20news-bydate/20news-bydate-train/"
./bin/mahout org.apache.mahout.classifier.sgd.TrainNewsGroups ${WORK_DIR}/20news-bydate/20news-bydate-train/
Modified: mahout/trunk/src/conf/driver.classes.props
URL: http://svn.apache.org/viewvc/mahout/trunk/src/conf/driver.classes.props?rev=1337068&r1=1337067&r2=1337068&view=diff
==============================================================================
--- mahout/trunk/src/conf/driver.classes.props (original)
+++ mahout/trunk/src/conf/driver.classes.props Fri May 11 08:58:55 2012
@@ -39,12 +39,6 @@ org.apache.mahout.clustering.topdown.pos
#Freq. Itemset Mining
org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver = fpg : Frequent Pattern Growth
#Classification
-#old bayes
-org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups = prepare20newsgroups : Reformat 20 newsgroups data
-org.apache.mahout.classifier.bayes.WikipediaXmlSplitter = wikipediaXMLSplitter : Reads wikipedia data and creates ch
-org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorDriver = wikipediaDataSetCreator : Splits data set of wikipedia wrt feature like country
-org.apache.mahout.classifier.bayes.TestClassifier = testclassifier : Test the text based Bayes Classifier
-org.apache.mahout.classifier.bayes.TrainClassifier = trainclassifier : Train the text based Bayes Classifier
#new bayes
org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob = trainnb : Train the Vector-based Bayes classifier
org.apache.mahout.classifier.naivebayes.test.TestNaiveBayesDriver = testnb : Test the Vector-based Bayes classifier
@@ -68,8 +62,4 @@ org.apache.mahout.cf.taste.hadoop.als.Fa
org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob = itemsimilarity : Compute the item-item-similarities for item-based collaborative filtering
org.apache.mahout.cf.taste.hadoop.item.RecommenderJob = recommenditembased : Compute recommendations using item-based collaborative filtering
org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob = parallelALS : ALS-WR factorization of a rating matrix
-org.apache.mahout.cf.taste.hadoop.als.RecommenderJob = recommendfactorized : Compute recommendations using the factorization of a rating matrix
-
-#Link Analysis
-org.apache.mahout.graph.linkanalysis.PageRankJob = pagerank : compute the PageRank of a graph
-org.apache.mahout.graph.linkanalysis.RandomWalkWithRestartJob = randomwalkwithrestart : compute all other vertices' proximity to a source vertex in a graph
+org.apache.mahout.cf.taste.hadoop.als.RecommenderJob = recommendfactorized : Compute recommendations using the factorization of a rating matrix
\ No newline at end of file