You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dr...@apache.org on 2010/05/31 04:04:01 UTC
svn commit: r949649 - in /mahout/trunk: examples/bin/build-reuters.sh
utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
Author: drew
Date: Mon May 31 02:04:01 2010
New Revision: 949649
URL: http://svn.apache.org/viewvc?rev=949649&view=rev
Log:
MAHOUT-398: eliminated separate tfidf directory for tfidf vector output.
Modified:
mahout/trunk/examples/bin/build-reuters.sh
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
Modified: mahout/trunk/examples/bin/build-reuters.sh
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=949649&r1=949648&r2=949649&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Mon May 31 02:04:01 2010
@@ -41,9 +41,9 @@ cd ../..
./bin/mahout seqdirectory -i ./examples/bin/work/reuters-out/ -o ./examples/bin/work/reuters-out-seqdir -c UTF-8 -chunk 5
# to use k-Means clustering, uncomment the next three lines
-#./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o ./examples/bin/work/reuters-out-seqdir-sparse
-#./bin/mahout kmeans -i ./examples/bin/work/reuters-out-seqdir-sparse/tfidf/tfidf-vectors/ -c ./examples/bin/work/clusters -o ./examples/bin/work/reuters-kmeans -x 10 -k 20 -ow
-#./bin/mahout clusterdump -s examples/bin/work/reuters-kmeans/clusters-10 -d examples/bin/work/reuters-out-seqdir-sparse/dictionary.file-0 -dt sequencefile -b 100 -n 20
+./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o ./examples/bin/work/reuters-out-seqdir-sparse
+./bin/mahout kmeans -i ./examples/bin/work/reuters-out-seqdir-sparse/tfidf-vectors/ -c ./examples/bin/work/clusters -o ./examples/bin/work/reuters-kmeans -x 10 -k 20 -ow
+./bin/mahout clusterdump -s examples/bin/work/reuters-kmeans/clusters-10 -d examples/bin/work/reuters-out-seqdir-sparse/dictionary.file-0 -dt sequencefile -b 100 -n 20
# to use LDA clustering, uncomment the next three lines
#./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o ./examples/bin/work/reuters-out-seqdir-sparse -wt tf -seq -nr 3
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java?rev=949649&r1=949648&r2=949649&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java Mon May 31 02:04:01 2010
@@ -223,7 +223,7 @@ public final class SparseVectorsFromSequ
if (processIdf) {
TFIDFConverter.processTfIdf(
new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER),
- new Path(outputDir, TFIDFConverter.TFIDF_OUTPUT_FOLDER), chunkSize, minDf, maxDFPercent, norm,
+ outputDir, chunkSize, minDf, maxDFPercent, norm,
sequentialAccessOutput, reduceTasks);
}
} catch (OptionException e) {