You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/10/15 17:35:51 UTC
svn commit: r1183661 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/org/apache/m...
Author: srowen
Date: Sat Oct 15 15:35:50 2011
New Revision: 1183661
URL: http://svn.apache.org/viewvc?rev=1183661&view=rev
Log:
MAHOUT-842 test pass, no objections heard. If it creates any breakage in scripts, it will be minor and self-explanatory and better to standardize now
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Sat Oct 15 15:35:50 2011
@@ -93,7 +93,7 @@ public class ParallelALSFactorizationJob
addOutputOption();
addOption("lambda", "l", "regularization parameter", true);
addOption("numFeatures", "f", "dimension of the feature space", true);
- addOption("numIterations", "i", "number of iterations", true);
+ addOption("numIterations", null, "number of iterations", true);
Map<String,String> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java Sat Oct 15 15:35:50 2011
@@ -61,8 +61,8 @@ public class PredictionJob extends Abstr
public int run(String[] args) throws Exception {
addOption("pairs", "p", "path containing the test ratings, each line must be: userID,itemID", true);
- addOption("userFeatures", "u", "path to the user feature matrix", true);
- addOption("itemFeatures", "i", "path to the item feature matrix", true);
+ addOption("userFeatures", null, "path to the user feature matrix", true);
+ addOption("itemFeatures", null, "path to the item feature matrix", true);
addOutputOption();
Map<String,String> parsedArgs = parseArguments(args);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java Sat Oct 15 15:35:50 2011
@@ -17,6 +17,7 @@
package org.apache.mahout.cf.taste.hadoop.als.eval;
+import com.google.common.base.Charsets;
import com.google.common.io.Closeables;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -41,7 +42,6 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
-import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -72,8 +72,8 @@ public class InMemoryFactorizationEvalua
public int run(String[] args) throws Exception {
addOption("pairs", "p", "path containing the test ratings, each line must be userID,itemID,rating", true);
- addOption("userFeatures", "u", "path to the user feature matrix", true);
- addOption("itemFeatures", "i", "path to the item feature matrix", true);
+ addOption("userFeatures", null, "path to the user feature matrix", true);
+ addOption("itemFeatures", null, "path to the item feature matrix", true);
addOutputOption();
Map<String,String> parsedArgs = parseArguments(args);
@@ -151,7 +151,7 @@ public class InMemoryFactorizationEvalua
InputStream in = null;
try {
in = fs.open(path);
- BufferedReader reader = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(in, Charsets.UTF_8));
String line;
while ((line = reader.readLine()) != null) {
String[] tokens = TasteHadoopUtils.splitPrefTokens(line);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java Sat Oct 15 15:35:50 2011
@@ -69,8 +69,8 @@ public class ParallelFactorizationEvalua
public int run(String[] args) throws Exception {
addOption("pairs", "p", "path containing the test ratings, each line must be userID,itemID,rating", true);
- addOption("userFeatures", "u", "path to the user feature matrix", true);
- addOption("itemFeatures", "i", "path to the item feature matrix", true);
+ addOption("userFeatures", null, "path to the user feature matrix", true);
+ addOption("itemFeatures", null, "path to the item feature matrix", true);
addOutputOption();
Map<String,String> parsedArgs = parseArguments(args);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Sat Oct 15 15:35:50 2011
@@ -100,8 +100,8 @@ public final class RecommenderJob extend
addOutputOption();
addOption("numRecommendations", "n", "Number of recommendations per user",
String.valueOf(AggregateAndRecommendReducer.DEFAULT_NUM_RECOMMENDATIONS));
- addOption("usersFile", "u", "File of users to recommend for", null);
- addOption("itemsFile", "i", "File of items to recommend for", null);
+ addOption("usersFile", null, "File of users to recommend for", null);
+ addOption("itemsFile", null, "File of items to recommend for", null);
addOption("filterFile", "f", "File containing comma-separated userID,itemID pairs. Used to exclude the item from "
+ "the recommendations for that user (optional)", null);
addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Sat Oct 15 15:35:50 2011
@@ -42,6 +42,7 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -263,13 +264,9 @@ public final class BayesFileFormatter {
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription("The Input file")
- .withShortName("i").create();
-
- Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription("The output file")
- .withShortName("o").create();
+ Option inputOpt = DefaultOptionCreator.inputOption().create();
+
+ Option outputOpt = DefaultOptionCreator.outputOption().create();
Option labelOpt = obuilder.withLongName("label").withRequired(true).withArgument(
abuilder.withName("label").withMinimum(1).withMaximum(1).create()).withDescription("The label of the file")
@@ -288,7 +285,7 @@ public final class BayesFileFormatter {
abuilder.withName("collapse").withMinimum(1).withMaximum(1).create()).withDescription(
"Collapse a whole directory to a single file, one doc per line").withShortName("p").create();
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
+ Option helpOpt = DefaultOptionCreator.helpOption();
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(labelOpt)
.withOption(analyzerOpt).withOption(charsetOpt).withOption(collapseOpt).withOption(helpOpt).create();
try {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java Sat Oct 15 15:35:50 2011
@@ -65,15 +65,9 @@ public final class TrainClassifier {
Option helpOpt = DefaultOptionCreator.helpOption();
- Option inputDirOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Directory on HDFS containing the collapsed, properly formatted files: "
- + "One doc per line, first entry on the line is the label, rest is the evidence")
- .withShortName("i").create();
-
- Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "The location of the model on the HDFS").withShortName("o").create();
+ Option inputDirOpt = DefaultOptionCreator.inputOption().create();
+
+ Option outputOpt = DefaultOptionCreator.outputOption().create();
Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false).withArgument(
abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()).withDescription(
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java Sat Oct 15 15:35:50 2011
@@ -27,6 +27,7 @@ import org.apache.commons.cli2.builder.D
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import java.io.DataOutputStream;
import java.io.FileInputStream;
@@ -49,15 +50,9 @@ public final class BaumWelchTrainer {
DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
ArgumentBuilder argumentBuilder = new ArgumentBuilder();
- Option inputOption = optionBuilder.withLongName("input").
- withDescription("Text file with space-separated integers to train on").
- withShortName("i").withArgument(argumentBuilder.withMaximum(1).withMinimum(1).
- withName("path").create()).withRequired(true).create();
-
- Option outputOption = optionBuilder.withLongName("output").
- withDescription("Path trained HMM model should be serialized to").
- withShortName("o").withArgument(argumentBuilder.withMaximum(1).withMinimum(1).
- withName("path").create()).withRequired(true).create();
+ Option inputOption = DefaultOptionCreator.inputOption().create();
+
+ Option outputOption = DefaultOptionCreator.outputOption().create();
Option stateNumberOption = optionBuilder.withLongName("nrOfHiddenStates").
withDescription("Number of hidden states").
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java Sat Oct 15 15:35:50 2011
@@ -27,6 +27,7 @@ import org.apache.commons.cli2.builder.D
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import java.io.DataInputStream;
import java.io.FileInputStream;
@@ -49,15 +50,9 @@ public final class ViterbiEvaluator {
DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
ArgumentBuilder argumentBuilder = new ArgumentBuilder();
- Option inputOption = optionBuilder.withLongName("input").
- withDescription("Text file with space-separated integers to segment").
- withShortName("i").withArgument(argumentBuilder.withMaximum(1).withMinimum(1).
- withName("path").create()).withRequired(true).create();
+ Option inputOption = DefaultOptionCreator.inputOption().create();
- Option outputOption = optionBuilder.withLongName("output").
- withDescription("Output file with decoded sequence of hidden states").
- withShortName("o").withArgument(argumentBuilder.withMaximum(1).withMinimum(1).
- withName("path").create()).withRequired(true).create();
+ Option outputOption = DefaultOptionCreator.outputOption().create();
Option modelOption = optionBuilder.withLongName("model").
withDescription("Path to serialized HMM model").
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java Sat Oct 15 15:35:50 2011
@@ -67,9 +67,7 @@ public final class FPGrowthDriver {
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputDirOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Directory on HDFS containing the transaction files").withShortName("i").create();
+ Option inputDirOpt = DefaultOptionCreator.inputOption().create();
Option outputOpt = DefaultOptionCreator.outputOption().create();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Sat Oct 15 15:35:50 2011
@@ -32,6 +32,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.vectorizer.collocations.llr.LLRReducer;
import org.apache.mahout.vectorizer.common.PartialVectorMerger;
import org.apache.mahout.vectorizer.tfidf.TFIDFConverter;
@@ -55,13 +56,9 @@ public final class SparseVectorsFromSequ
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputDirOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "input dir containing the documents in sequence file format").withShortName("i").create();
-
- Option outputDirOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "The output directory").withShortName("o").create();
+ Option inputDirOpt = DefaultOptionCreator.inputOption().create();
+
+ Option outputDirOpt = DefaultOptionCreator.outputOption().create();
Option minSupportOpt = obuilder.withLongName("minSupport").withArgument(
abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create()).withDescription(
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Sat Oct 15 15:35:50 2011
@@ -43,6 +43,7 @@ import org.apache.hadoop.util.GenericsUt
import org.apache.lucene.analysis.Analyzer;
import org.apache.mahout.analysis.WikipediaAnalyzer;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.common.HadoopUtil;
import org.slf4j.Logger;
@@ -69,13 +70,9 @@ public final class WikipediaDatasetCreat
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option dirInputPathOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "The input directory path").withShortName("i").create();
-
- Option dirOutputPathOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "The output directory Path").withShortName("o").create();
+ Option dirInputPathOpt = DefaultOptionCreator.inputOption().create();
+
+ Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create();
Option categoriesOpt = obuilder.withLongName("categories").withRequired(true).withArgument(
abuilder.withName("categories").withMinimum(1).withMaximum(1).create()).withDescription(
@@ -88,8 +85,7 @@ public final class WikipediaDatasetCreat
Option analyzerOpt = obuilder.withLongName("analyzer").withRequired(false).withArgument(
abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).withDescription(
"The analyzer to use, must have a no argument constructor").withShortName("a").create();
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
- .create();
+ Option helpOpt = DefaultOptionCreator.helpOption();
Group group = gbuilder.withName("Options").withOption(categoriesOpt).withOption(dirInputPathOpt)
.withOption(dirOutputPathOpt).withOption(exactMatchOpt).withOption(analyzerOpt).withOption(helpOpt)
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java Sat Oct 15 15:35:50 2011
@@ -41,6 +41,7 @@ import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.df.DFUtils;
import org.apache.mahout.df.DecisionForest;
import org.apache.mahout.classifier.ResultAnalyzer;
@@ -79,8 +80,7 @@ public class TestForest extends Configur
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = obuilder.withLongName("input").withShortName("i").withRequired(true).withArgument(
- abuilder.withName("path").withMinimum(1).withMaximum(1).create()).withDescription("Test data path").create();
+ Option inputOpt = DefaultOptionCreator.inputOption().create();
Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true).withArgument(
abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()).withDescription("Dataset path")
@@ -90,16 +90,13 @@ public class TestForest extends Configur
abuilder.withName("path").withMinimum(1).withMaximum(1).create()).
withDescription("Path to the Decision Forest").create();
- Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(false).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "Path to generated predictions file").create();
+ Option outputOpt = DefaultOptionCreator.outputOption().create();
Option analyzeOpt = obuilder.withLongName("analyze").withShortName("a").withRequired(false).create();
Option mrOpt = obuilder.withLongName("mapreduce").withShortName("mr").withRequired(false).create();
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
- .create();
+ Option helpOpt = DefaultOptionCreator.helpOption();
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(datasetOpt).withOption(modelOpt)
.withOption(outputOpt).withOption(analyzeOpt).withOption(mrOpt).withOption(helpOpt).create();
@@ -230,7 +227,7 @@ public class TestForest extends Configur
Path[] infiles = DFUtils.listOutputFiles(dataFS, dataPath);
for (Path path : infiles) {
- log.info("Classifying : " + path);
+ log.info("Classifying : {}", path);
Path outfile = outPath != null ? new Path(outPath, path.getName()).suffix(".out") : null;
testFile(path, outfile, converter, forest, dataset, analyzer, rng);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java Sat Oct 15 15:35:50 2011
@@ -80,9 +80,7 @@ public final class CDGA {
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i").withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create())
- .withDescription("The Path for input data directory.").create();
+ Option inputOpt = DefaultOptionCreator.inputOption().create();
Option labelOpt = obuilder.withLongName("label").withRequired(true).withShortName("l")
.withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java Sat Oct 15 15:35:50 2011
@@ -32,8 +32,6 @@ import org.apache.commons.cli2.CommandLi
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.conf.Configuration;
@@ -207,13 +205,9 @@ public final class CDInfosTool {
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
- DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
- ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i").withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create())
- .withDescription("The Path for input data directory.").create();
+ Option inputOpt = DefaultOptionCreator.inputOption().create();
Option helpOpt = DefaultOptionCreator.helpOption();
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java Sat Oct 15 15:35:50 2011
@@ -43,6 +43,7 @@ import org.apache.hadoop.mapreduce.lib.o
import org.apache.hadoop.util.GenericsUtil;
import org.apache.mahout.classifier.bayes.XmlInputFormat;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.common.HadoopUtil;
import org.slf4j.Logger;
@@ -70,13 +71,9 @@ public final class WikipediaToSequenceFi
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option dirInputPathOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "The input directory path").withShortName("i").create();
-
- Option dirOutputPathOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "The output directory Path").withShortName("o").create();
+ Option dirInputPathOpt = DefaultOptionCreator.inputOption().create();
+
+ Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create();
Option categoriesOpt = obuilder.withLongName("categories").withArgument(
abuilder.withName("categories").withMinimum(1).withMaximum(1).create()).withDescription(
@@ -90,8 +87,7 @@ public final class WikipediaToSequenceFi
Option allOpt = obuilder.withLongName("all")
.withDescription("If set, Select all files. Default is false").withShortName("all").create();
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
- .create();
+ Option helpOpt = DefaultOptionCreator.helpOption();
Group group = gbuilder.withName("Options").withOption(categoriesOpt).withOption(dirInputPathOpt)
.withOption(dirOutputPathOpt).withOption(exactMatchOpt).withOption(allOpt).withOption(helpOpt)
@@ -162,9 +158,8 @@ public final class WikipediaToSequenceFi
+ "org.apache.hadoop.io.serializer.WritableSerialization");
Job job = new Job(conf);
- if (WikipediaToSequenceFile.log.isInfoEnabled()) {
- log.info("Input: " + input + " Out: " + output + " Categories: " + catFile
- + " All Files: " + all);
+ if (log.isInfoEnabled()) {
+ log.info("Input: {} Out: {} Categories: {} All Files: {}", new Object[] {input, output, catFile, all});
}
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Sat Oct 15 15:35:50 2011
@@ -35,6 +35,7 @@ import org.apache.hadoop.io.DoubleWritab
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.IntPairWritable;
import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.utils.vectors.VectorHelper;
@@ -72,18 +73,14 @@ public final class LDAPrintTopics {
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "Path to an LDA output (a state)").withShortName("i").create();
+ Option inputOpt = DefaultOptionCreator.inputOption().create();
Option dictOpt = obuilder.withLongName("dict").withRequired(true).withArgument(
abuilder.withName("dict").withMinimum(1).withMaximum(1).create()).withDescription(
"Dictionary to read in, in the same format as one created by "
+ "org.apache.mahout.utils.vectors.lucene.Driver").withShortName("d").create();
- Option outOpt = obuilder.withLongName("output").withRequired(false).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "Output directory to write top words").withShortName("o").create();
+ Option outOpt = DefaultOptionCreator.outputOption().create();
Option wordOpt = obuilder.withLongName("words").withRequired(false).withArgument(
abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create()).withDescription(
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java Sat Oct 15 15:35:50 2011
@@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.utils.email.MailProcessor;
import org.apache.mahout.utils.email.MailOptions;
@@ -57,18 +58,19 @@ public final class SequenceFilesFromMail
private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromMailArchives.class);
public void createSequenceFiles(MailOptions options) throws IOException {
- ChunkedWriter writer = new ChunkedWriter(new Configuration(), options.chunkSize, new Path(options.outputDir));
- MailProcessor processor = new MailProcessor(options, options.prefix, writer);
+ ChunkedWriter writer = new ChunkedWriter(new Configuration(), options.getChunkSize(), new Path(options.getOutputDir()));
+ MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
try {
- if (options.input.isDirectory()) {
+ if (options.getInput().isDirectory()) {
PrefixAdditionFilter filter = new PrefixAdditionFilter(processor, writer);
- options.input.listFiles(filter);
- log.info("Parsed " + filter.getMessageCount() + " messages from " + options.input.getAbsolutePath());
+ options.getInput().listFiles(filter);
+ log.info("Parsed {} messages from {}", filter.getMessageCount(), options.getInput().getAbsolutePath());
} else {
long start = System.currentTimeMillis();
- long cnt = processor.parseMboxLineByLine(options.input);
+ long cnt = processor.parseMboxLineByLine(options.getInput());
long finish = System.currentTimeMillis();
- log.info("Parsed " + cnt + " messages from " + options.input.getAbsolutePath() + " in time: " + (finish - start));
+ log.info("Parsed {} messages from {} in time: {}",
+ new Object[] { cnt, options.getInput().getAbsolutePath(), (finish - start) });
}
} finally {
Closeables.closeQuietly(writer);
@@ -76,8 +78,8 @@ public final class SequenceFilesFromMail
}
public class PrefixAdditionFilter implements FileFilter {
- private MailProcessor processor;
- private ChunkedWriter writer;
+ private final MailProcessor processor;
+ private final ChunkedWriter writer;
private long messageCount;
public PrefixAdditionFilter(MailProcessor processor, ChunkedWriter writer) {
@@ -93,13 +95,12 @@ public final class SequenceFilesFromMail
@Override
public boolean accept(File current) {
if (current.isDirectory()) {
- log.info("At " + current.getAbsolutePath());
- PrefixAdditionFilter nested =
- new PrefixAdditionFilter(new MailProcessor(processor.getOptions(), processor.getPrefix() + File.separator + current.getName(), writer),
- writer);
+ log.info("At {}", current.getAbsolutePath());
+ PrefixAdditionFilter nested = new PrefixAdditionFilter(new MailProcessor(
+ processor.getOptions(), processor.getPrefix() + File.separator + current.getName(), writer), writer);
current.listFiles(nested);
long dirCount = nested.getMessageCount();
- log.info("Parsed " + dirCount + " messages from directory " + current.getAbsolutePath());
+ log.info("Parsed {} messages from directory {}", dirCount, current.getAbsolutePath());
messageCount += dirCount;
} else {
try {
@@ -117,13 +118,9 @@ public final class SequenceFilesFromMail
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "The input file/dir containing the documents").withShortName("i").create();
-
- Option outputDirOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "The output directory").withShortName("o").create();
+ Option inputOpt = DefaultOptionCreator.inputOption().create();
+
+ Option outputDirOpt = DefaultOptionCreator.outputOption().create();
Option chunkSizeOpt = obuilder.withLongName("chunkSize").withArgument(
abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()).withDescription(
@@ -152,8 +149,7 @@ public final class SequenceFilesFromMail
Option bodySeparatorOpt = obuilder.withLongName("bodySeparator").withRequired(false).withArgument(
abuilder.withName("bodySeparator").withMinimum(1).withMaximum(1).create()).
withDescription("The separator to use between lines in the body. Default is \\n. Useful to change if you wish to have the message be on one line").withShortName("bodySep").create();
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
- .create();
+ Option helpOpt = DefaultOptionCreator.helpOption();
Group group = gbuilder.withName("Options").withOption(keyPrefixOpt).withOption(chunkSizeOpt).withOption(
charsetOpt).withOption(outputDirOpt).withOption(helpOpt).withOption(inputOpt).withOption(subjectOpt).withOption(toOpt)
@@ -185,16 +181,17 @@ public final class SequenceFilesFromMail
Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
SequenceFilesFromMailArchives dir = new SequenceFilesFromMailArchives();
MailOptions options = new MailOptions();
- options.input = input;
- options.outputDir = outputDir;
- options.prefix = prefix;
- options.chunkSize = chunkSize;
- options.charset = charset;
+ options.setInput(input);
+ options.setOutputDir(outputDir);
+ options.setPrefix(prefix);
+ options.setChunkSize(chunkSize);
+ options.setCharset(charset);
List<Pattern> patterns = new ArrayList<Pattern>(5);
- //patternOrder is used downstream so that we can know what order the text is in instead of encoding it in the string, which
- //would require more processing later to remove it pre feature selection.
+ // patternOrder is used downstream so that we can know what order the text is in instead
+ // of encoding it in the string, which
+ // would require more processing later to remove it pre feature selection.
Map<String, Integer> patternOrder = new HashMap<String, Integer>();
int order = 0;
if (cmdLine.hasOption(fromOpt)) {
@@ -213,22 +210,20 @@ public final class SequenceFilesFromMail
patterns.add(MailProcessor.SUBJECT_PREFIX);
patternOrder.put(MailOptions.SUBJECT, order++);
}
- options.patternsToMatch = patterns.toArray(new Pattern[patterns.size()]);
- options.patternOrder = patternOrder;
- options.includeBody = cmdLine.hasOption(bodyOpt);
- options.separator = "\n";
+ options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
+ options.setPatternOrder(patternOrder);
+ options.setIncludeBody(cmdLine.hasOption(bodyOpt));
+ options.setSeparator("\n");
if (cmdLine.hasOption(separatorOpt)) {
- options.separator = cmdLine.getValue(separatorOpt).toString();
+ options.setSeparator(cmdLine.getValue(separatorOpt).toString());
}
if (cmdLine.hasOption(bodySeparatorOpt)) {
- options.bodySeparator = cmdLine.getValue(bodySeparatorOpt).toString();
+ options.setBodySeparator(cmdLine.getValue(bodySeparatorOpt).toString());
}
long start = System.currentTimeMillis();
dir.createSequenceFiles(options);
long finish = System.currentTimeMillis();
- if (log.isInfoEnabled()) {
- log.info("Conversion took " + (finish - start) + " ms");
- }
+ log.info("Conversion took {}ms", finish - start);
} catch (OptionException e) {
log.error("Exception", e);
CommandLineUtil.printHelp(group);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Sat Oct 15 15:35:50 2011
@@ -52,6 +52,7 @@ import org.apache.lucene.store.FSDirecto
import org.apache.lucene.util.OpenBitSet;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.stats.LogLikelihood;
@@ -320,7 +321,7 @@ public class ClusterLabels {
Option maxLabelsOpt = obuilder.withLongName("maxLabels").withRequired(false).withArgument(
abuilder.withName("maxLabels").withMinimum(1).withMaximum(1).create()).withDescription(
"The maximum number of labels to print per cluster").withShortName("x").create();
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
+ Option helpOpt = DefaultOptionCreator.helpOption();
Group group = gbuilder.withName("Options").withOption(indexOpt).withOption(idFieldOpt).withOption(outputOpt)
.withOption(fieldOpt).withOption(seqOpt).withOption(pointsOpt).withOption(helpOpt)
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=1183661&r1=1183660&r2=1183661&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java Sat Oct 15 15:35:50 2011
@@ -144,7 +144,7 @@ public final class Driver {
Option idFieldOpt = obuilder.withLongName("idField").withRequired(false).withArgument(
abuilder.withName("idField").withMinimum(1).withMaximum(1).create()).withDescription(
"The field in the index containing the index. If null, then the Lucene internal doc "
- + "id is used which is prone to error if the underlying index changes").withShortName("i").create();
+ + "id is used which is prone to error if the underlying index changes").create();
Option dictOutOpt = obuilder.withLongName("dictOut").withRequired(true).withArgument(
abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create()).withDescription(