You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2012/02/13 16:14:19 UTC

svn commit: r1243556 [1/2] - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ core/src/main/java/org...

Author: gsingers
Date: Mon Feb 13 15:14:18 2012
New Revision: 1243556

URL: http://svn.apache.org/viewvc?rev=1243556&view=rev
Log:
MAHOUT-947: add new inputs to seq dumper, refactor to common CLI input

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalk.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalkWithRestartJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java Mon Feb 13 15:14:18 2012
@@ -32,6 +32,7 @@ import org.apache.mahout.common.Abstract
 import org.apache.mahout.common.RandomUtils;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
@@ -73,10 +74,10 @@ public class DatasetSplitter extends Abs
     addOption("probePercentage", "p", "percentage of the data to use as probe set (default: " +
         DEFAULT_PROBE_PERCENTAGE + ')', String.valueOf(DEFAULT_PROBE_PERCENTAGE));
 
-    Map<String, String> parsedArgs = parseArguments(args);
-    double trainingPercentage = Double.parseDouble(parsedArgs.get("--trainingPercentage"));
-    double probePercentage = Double.parseDouble(parsedArgs.get("--probePercentage"));
-    String tempDir = parsedArgs.get("--tempDir");
+    Map<String, List<String>> parsedArgs = parseArguments(args);
+    double trainingPercentage = Double.parseDouble(getOption("trainingPercentage"));
+    double probePercentage = Double.parseDouble(getOption("probePercentage"));
+    String tempDir = getOption("tempDir");
 
     Path markedPrefs = new Path(tempDir, "markedPreferences");
     Path trainingSetPath = new Path(getOutputPath(), "trainingSet");

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java Mon Feb 13 15:14:18 2012
@@ -44,6 +44,7 @@ import org.apache.mahout.math.map.OpenIn
 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -75,7 +76,7 @@ public class FactorizationEvaluator exte
     addOption("itemFeatures", null, "path to the item feature matrix", true);
     addOutputOption();
 
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
@@ -84,10 +85,11 @@ public class FactorizationEvaluator exte
 
     Job predictRatings = prepareJob(getInputPath(), errors, TextInputFormat.class, PredictRatingsMapper.class,
         DoubleWritable.class, NullWritable.class, SequenceFileOutputFormat.class);
-    predictRatings.getConfiguration().set(USER_FEATURES_PATH, parsedArgs.get("--userFeatures"));
-    predictRatings.getConfiguration().set(ITEM_FEATURES_PATH, parsedArgs.get("--itemFeatures"));
+
+    predictRatings.getConfiguration().set(USER_FEATURES_PATH, getOption("userFeatures"));
+    predictRatings.getConfiguration().set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
     boolean succeeded = predictRatings.waitForCompletion(true);
-    if (!succeeded) 
+    if (!succeeded)
       return -1;
 
     BufferedWriter writer  = null;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Mon Feb 13 15:14:18 2012
@@ -109,16 +109,16 @@ public class ParallelALSFactorizationJob
     addOption("numFeatures", null, "dimension of the feature space", true);
     addOption("numIterations", null, "number of iterations", true);
 
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
 
-    numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures"));
-    numIterations = Integer.parseInt(parsedArgs.get("--numIterations"));
-    lambda = Double.parseDouble(parsedArgs.get("--lambda"));
-    alpha = Double.parseDouble(parsedArgs.get("--alpha"));
-    implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback"));
+    numFeatures = Integer.parseInt(getOption("numFeatures"));
+    numIterations = Integer.parseInt(getOption("numIterations"));
+    lambda = Double.parseDouble(getOption("lambda"));
+    alpha = Double.parseDouble(getOption("alpha"));
+    implicitFeedback = Boolean.parseBoolean(getOption("implicitFeedback"));
 
     /*
         * compute the factorization A = U M'

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java Mon Feb 13 15:14:18 2012
@@ -80,7 +80,7 @@ public class RecommenderJob extends Abst
     addOption("maxRating", null, "maximum rating available", true);
     addOutputOption();
 
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
@@ -88,14 +88,15 @@ public class RecommenderJob extends Abst
     Job prediction = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, PredictionMapper.class,
         IntWritable.class, RecommendedItemsWritable.class, TextOutputFormat.class);
     prediction.getConfiguration().setInt(NUM_RECOMMENDATIONS,
-        Integer.parseInt(parsedArgs.get("--numRecommendations")));
-    prediction.getConfiguration().set(USER_FEATURES_PATH, parsedArgs.get("--userFeatures"));
-    prediction.getConfiguration().set(ITEM_FEATURES_PATH, parsedArgs.get("--itemFeatures"));
-    prediction.getConfiguration().set(MAX_RATING, parsedArgs.get("--maxRating"));
+        Integer.parseInt(getOption("numRecommendations")));
+    prediction.getConfiguration().set(USER_FEATURES_PATH, getOption("userFeatures"));
+    prediction.getConfiguration().set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
+    prediction.getConfiguration().set(MAX_RATING, getOption("maxRating"));
     boolean succeeded = prediction.waitForCompletion(true);
-    if (!succeeded) 
+    if (!succeeded)
       return -1;
 
+
     return 0;
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Mon Feb 13 15:14:18 2012
@@ -38,6 +38,7 @@ import org.apache.mahout.math.VarLongWri
 import org.apache.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob;
 import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasures;
 
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.regex.Matcher;
@@ -118,24 +119,24 @@ public final class RecommenderJob extend
             "alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')', true);
     addOption("threshold", "tr", "discard item pairs with a similarity value below this", false);
 
-    Map<String, String> parsedArgs = parseArguments(args);
+    Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
 
     Path outputPath = getOutputPath();
-    int numRecommendations = Integer.parseInt(parsedArgs.get("--numRecommendations"));
-    String usersFile = parsedArgs.get("--usersFile");
-    String itemsFile = parsedArgs.get("--itemsFile");
-    String filterFile = parsedArgs.get("--filterFile");
-    boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
-    int maxPrefsPerUser = Integer.parseInt(parsedArgs.get("--maxPrefsPerUser"));
-    int minPrefsPerUser = Integer.parseInt(parsedArgs.get("--minPrefsPerUser"));
-    int maxPrefsPerUserInItemSimilarity = Integer.parseInt(parsedArgs.get("--maxPrefsPerUserInItemSimilarity"));
-    int maxSimilaritiesPerItem = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerItem"));
-    String similarityClassname = parsedArgs.get("--similarityClassname");
-    double threshold = parsedArgs.containsKey("--threshold") ?
-            Double.parseDouble(parsedArgs.get("--threshold")) : RowSimilarityJob.NO_THRESHOLD;
+    int numRecommendations = Integer.parseInt(getOption("numRecommendations"));
+    String usersFile = getOption("usersFile");
+    String itemsFile = getOption("itemsFile");
+    String filterFile = getOption("filterFile");
+    boolean booleanData = Boolean.valueOf(getOption("booleanData"));
+    int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser"));
+    int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
+    int maxPrefsPerUserInItemSimilarity = Integer.parseInt(getOption("maxPrefsPerUserInItemSimilarity"));
+    int maxSimilaritiesPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
+    String similarityClassname = getOption("similarityClassname");
+    double threshold = parsedArgs.containsKey("threshold") ?
+            Double.parseDouble(getOption("threshold")) : RowSimilarityJob.NO_THRESHOLD;
 
 
     Path prepPath = getTempPath("preparePreferenceMatrix");

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java Mon Feb 13 15:14:18 2012
@@ -35,6 +35,7 @@ import org.apache.mahout.math.VarIntWrit
 import org.apache.mahout.math.VarLongWritable;
 import org.apache.mahout.math.VectorWritable;
 
+import java.util.List;
 import java.util.Map;
 
 public class PreparePreferenceMatrixJob extends AbstractJob {
@@ -62,14 +63,14 @@ public class PreparePreferenceMatrixJob 
     addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
     addOption("ratingShift", "rs", "shift ratings by this value", "0.0");
 
-    Map<String, String> parsedArgs = parseArguments(args);
+    Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
 
-    int minPrefsPerUser = Integer.parseInt(parsedArgs.get("--minPrefsPerUser"));
-    boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
-    float ratingShift = Float.parseFloat(parsedArgs.get("--ratingShift"));
+    int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
+    boolean booleanData = Boolean.valueOf(getOption("booleanData"));
+    float ratingShift = Float.parseFloat(getOption("ratingShift"));
     //convert items to an internal index
     Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class,
             ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class,
@@ -100,8 +101,8 @@ public class PreparePreferenceMatrixJob 
     toItemVectors.setCombinerClass(ToItemVectorsReducer.class);
 
     /* configure sampling regarding the uservectors */
-    if (parsedArgs.containsKey("--maxPrefsPerUser")) {
-      int samplingSize = Integer.parseInt(parsedArgs.get("--maxPrefsPerUser"));
+    if (hasOption("maxPrefsPerUser")) {
+      int samplingSize = Integer.parseInt(getOption("maxPrefsPerUser"));
       toItemVectors.getConfiguration().setInt(ToItemVectorsMapper.SAMPLE_SIZE, samplingSize);
     }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java Mon Feb 13 15:14:18 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.cf.taste.hadoop.pseudo;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -111,17 +112,17 @@ public final class RecommenderJob extend
     addOption("numRecommendations", "n", "Number of recommendations per user", "10");
     addOption("usersFile", "u", "File of users to recommend for", null);
     
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
 
     Path inputFile = getInputPath();
     Path outputPath = getOutputPath();
-    Path usersFile = parsedArgs.get("--usersFile") == null ? inputFile : new Path(parsedArgs.get("--usersFile"));
+    Path usersFile = hasOption("usersFile") ? inputFile : new Path(getOption("usersFile"));
     
-    String recommendClassName = parsedArgs.get("--recommenderClassName");
-    int recommendationsPerUser = Integer.parseInt(parsedArgs.get("--numRecommendations"));
+    String recommendClassName = getOption("recommenderClassName");
+    int recommendationsPerUser = Integer.parseInt(getOption("numRecommendations"));
     
     Job job = prepareJob(usersFile,
                          outputPath,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Feb 13 15:14:18 2012
@@ -19,6 +19,7 @@ package org.apache.mahout.cf.taste.hadoo
 
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -108,19 +109,19 @@ public final class ItemSimilarityJob ext
     addOption("booleanData", "b", "Treat input as without pref values", String.valueOf(Boolean.FALSE));
     addOption("threshold", "tr", "discard item pairs with a similarity value below this", false);
 
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
 
-    String similarityClassName = parsedArgs.get("--similarityClassname");
-    int maxSimilarItemsPerItem = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerItem"));
-    int maxPrefsPerUser = Integer.parseInt(parsedArgs.get("--maxPrefsPerUser"));
-    int minPrefsPerUser = Integer.parseInt(parsedArgs.get("--minPrefsPerUser"));
-    boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
+    String similarityClassName = getOption("similarityClassname");
+    int maxSimilarItemsPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
+    int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser"));
+    int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
+    boolean booleanData = Boolean.valueOf(getOption("booleanData"));
 
-    double threshold = parsedArgs.containsKey("--threshold") ?
-        Double.parseDouble(parsedArgs.get("--threshold")) : RowSimilarityJob.NO_THRESHOLD;
+    double threshold = hasOption("threshold") ?
+        Double.parseDouble(getOption("threshold")) : RowSimilarityJob.NO_THRESHOLD;
 
     Path similarityMatrixPath = getTempPath("similarityMatrix");
     Path prepPath = getTempPath("prepareRatingMatrix");

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Mon Feb 13 15:14:18 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.cf.taste.hadoop.slopeone;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -46,14 +47,14 @@ public final class SlopeOneAverageDiffsJ
     addInputOption();
     addOutputOption();
 
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
     
     Path prefsFile = getInputPath();
     Path outputPath = getOutputPath();
-    Path averagesOutputPath = new Path(parsedArgs.get("--tempDir"));
+    Path averagesOutputPath = new Path(getOption("--tempDir"));
 
     AtomicInteger currentPhase = new AtomicInteger();
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java Mon Feb 13 15:14:18 2012
@@ -39,6 +39,7 @@ import org.apache.mahout.math.VectorWrit
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -64,27 +65,27 @@ public class TestNaiveBayesDriver extend
     addOption("model", "m", "The path to the model built during training", true);
     addOption(buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false)));
     addOption("labelIndex", "l", "The path to the location of the label index", true);
-    Map<String, String> parsedArgs = parseArguments(args);
+    Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
     if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
       HadoopUtil.delete(getConf(), getOutputPath());
     }
-    Path model = new Path(parsedArgs.get("--model"));
+    Path model = new Path(getOption("model"));
     HadoopUtil.cacheFiles(model, getConf());
     //the output key is the expected value, the output value are the scores for all the labels
     Job testJob = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, BayesTestMapper.class,
             Text.class, VectorWritable.class, SequenceFileOutputFormat.class);
-    //testJob.getConfiguration().set(LABEL_KEY, parsedArgs.get("--labels"));
-    boolean complementary = parsedArgs.containsKey("--testComplementary");
+    //testJob.getConfiguration().set(LABEL_KEY, getOption("--labels"));
+    boolean complementary = parsedArgs.containsKey("testComplementary");
     testJob.getConfiguration().set(COMPLEMENTARY, String.valueOf(complementary));
     boolean succeeded = testJob.waitForCompletion(true);
     if (!succeeded) {
       return -1;
     }
     //load the labels
-    Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(parsedArgs.get("--labelIndex")));
+    Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex")));
 
     //loop over the results and create the confusion matrix
     SequenceFileDirIterable<Text, VectorWritable> dirIterable =

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java Mon Feb 13 15:14:18 2012
@@ -38,6 +38,7 @@ import org.apache.mahout.common.mapreduc
 import org.apache.mahout.math.VectorWritable;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -69,7 +70,7 @@ public final class TrainNaiveBayesJob ex
     addOption(buildOption("trainComplementary", "c", "train complementary?", false, false, String.valueOf(false)));
     addOption("labelIndex", "li", "The path to store the label index in", false);
     addOption(DefaultOptionCreator.overwriteOption().create());
-    Map<String, String> parsedArgs = parseArguments(args);
+    Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
@@ -78,15 +79,15 @@ public final class TrainNaiveBayesJob ex
       HadoopUtil.delete(getConf(), getTempPath());
     }
     Path labPath;
-    String labPathStr = parsedArgs.get("--labelIndex");
+    String labPathStr = getOption("labelIndex");
     if (labPathStr != null) {
       labPath = new Path(labPathStr);
     } else {
       labPath = getTempPath("labelIndex");
     }
-    long labelSize = createLabelIndex(parsedArgs, labPath);
-    float alphaI = Float.parseFloat(parsedArgs.get("--alphaI"));
-    boolean trainComplementary = Boolean.parseBoolean(parsedArgs.get("--trainComplementary"));
+    long labelSize = createLabelIndex(labPath);
+    float alphaI = Float.parseFloat(getOption("alphaI"));
+    boolean trainComplementary = Boolean.parseBoolean(getOption("trainComplementary"));
 
 
     HadoopUtil.setSerializations(getConf());
@@ -132,12 +133,12 @@ public final class TrainNaiveBayesJob ex
     return 0;
   }
 
-  private long createLabelIndex(Map<String, String> parsedArgs, Path labPath) throws IOException {
+  private long createLabelIndex(Path labPath) throws IOException {
     long labelSize = 0;
-    if (parsedArgs.containsKey("--labels")) {
-      Iterable<String> labels = Splitter.on(",").split(parsedArgs.get("--labels"));
+    if (hasOption("labels")) {
+      Iterable<String> labels = Splitter.on(",").split(getOption("labels"));
       labelSize = BayesUtils.writeLabelIndex(getConf(), labels, labPath);
-    } else if (parsedArgs.containsKey("--extractLabels")) {
+    } else if (hasOption("extractLabels")) {
       SequenceFileDirIterable<Text, IntWritable> iterable =
               new SequenceFileDirIterable<Text, IntWritable>(getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), getConf());
       labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Mon Feb 13 15:14:18 2012
@@ -64,22 +64,22 @@ public class EigencutsDriver extends Abs
     addOption(DefaultOptionCreator.inputOption().create());
     addOption(DefaultOptionCreator.outputOption().create());
     addOption(DefaultOptionCreator.overwriteOption().create());
-    Map<String, String> parsedArgs = parseArguments(arg0);
+    Map<String, List<String>> parsedArgs = parseArguments(arg0);
     if (parsedArgs == null) {
       return 0;
     }
 
     // read in the command line values
-    Path input = new Path(parsedArgs.get("--input"));
-    Path output = new Path(parsedArgs.get("--output"));
+    Path input = getInputPath();
+    Path output = getOutputPath();
     if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
       HadoopUtil.delete(getConf(), output);
     }
-    int dimensions = Integer.parseInt(parsedArgs.get("--dimensions"));
-    double halflife = Double.parseDouble(parsedArgs.get("--half-life"));
-    double epsilon = Double.parseDouble(parsedArgs.get("--epsilon"));
-    double tau = Double.parseDouble(parsedArgs.get("--tau"));
-    int eigenrank = Integer.parseInt(parsedArgs.get("--eigenrank"));
+    int dimensions = Integer.parseInt(getOption("dimensions"));
+    double halflife = Double.parseDouble(getOption("half-life"));
+    double epsilon = Double.parseDouble(getOption("epsilon"));
+    double tau = Double.parseDouble(getOption("tau"));
+    int eigenrank = Integer.parseInt(getOption("eigenrank"));
 
     run(getConf(), input, output, eigenrank, dimensions, halflife, epsilon, tau);
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java Mon Feb 13 15:14:18 2012
@@ -39,6 +39,7 @@ import org.apache.mahout.math.hadoop.dec
 import org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -65,7 +66,7 @@ public class SpectralKMeansDriver extend
     addOption(DefaultOptionCreator.convergenceOption().create());
     addOption(DefaultOptionCreator.maxIterationsOption().create());
     addOption(DefaultOptionCreator.overwriteOption().create());
-    Map<String, String> parsedArgs = parseArguments(arg0);
+    Map<String, List<String>> parsedArgs = parseArguments(arg0);
     if (parsedArgs == null) {
       return 0;
     }
@@ -75,8 +76,8 @@ public class SpectralKMeansDriver extend
     if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
       HadoopUtil.delete(conf, output);
     }
-    int numDims = Integer.parseInt(parsedArgs.get("--dimensions"));
-    int clusters = Integer.parseInt(parsedArgs.get("--clusters"));
+    int numDims = Integer.parseInt(getOption("dimensions"));
+    int clusters = Integer.parseInt(getOption("clusters"));
     String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
     DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
     double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Mon Feb 13 15:14:18 2012
@@ -17,7 +17,10 @@
 
 package org.apache.mahout.common;
 
+import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -92,14 +95,16 @@ public abstract class AbstractJob extend
 
   /** input path, populated by {@link #parseArguments(String[])} */
   private Path inputPath;
+  private File inputFile;//the input represented as a file
 
   /** output path, populated by {@link #parseArguments(String[]) */
   private Path outputPath;
+  private File outputFile;//the output represented as a file
 
   /** temp path, populated by {@link #parseArguments(String[]) */
   private Path tempPath;
 
-  private Map<String, String> argMap;
+  private Map<String, List<String>> argMap;
 
   /** internal list of options that have been added */
   private final List<Option> options;
@@ -131,6 +136,14 @@ public abstract class AbstractJob extend
     return new Path(outputPath, path);
   }
 
+  protected File getInputFile(){
+    return inputFile;
+  }
+
+  protected File getOutputFile(){
+    return outputFile;
+  }
+
 
   protected Path getTempPath() {
     return tempPath;
@@ -217,6 +230,8 @@ public abstract class AbstractJob extend
     this.outputOption = addOption(DefaultOptionCreator.outputOption().create());
   }
 
+
+
   /** Build an option with the given parameters. Name and description are
    *  required.
    * 
@@ -235,6 +250,16 @@ public abstract class AbstractJob extend
                                     boolean required,
                                     String defaultValue) {
 
+    return buildOption(name, shortName, description, hasArg, 1, 1, required, defaultValue);
+  }
+
+  protected static Option buildOption(String name,
+                                    String shortName,
+                                    String description,
+                                    boolean hasArg, int min, int max,
+                                    boolean required,
+                                    String defaultValue) {
+
     DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description)
         .withRequired(required);
 
@@ -243,7 +268,7 @@ public abstract class AbstractJob extend
     }
 
     if (hasArg) {
-      ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(1).withMaximum(1);
+      ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(min).withMaximum(max);
 
       if (defaultValue != null) {
         argBuilder = argBuilder.withDefault(defaultValue);
@@ -285,7 +310,7 @@ public abstract class AbstractJob extend
    *
    *
    */
-  public Map<String, String> parseArguments(String[] args) throws IOException {
+  public Map<String, List<String>> parseArguments(String[] args) throws IOException {
 
     Option helpOpt = addOption(DefaultOptionCreator.helpOption());
     addOption("tempDir", null, "Intermediate output directory", "temp");
@@ -326,12 +351,14 @@ public abstract class AbstractJob extend
       return null;
     }
 
-    argMap = new TreeMap<String, String>();
+    argMap = new TreeMap<String, List<String>>();
     maybePut(argMap, cmdLine, this.options.toArray(new Option[this.options.size()]));
 
-    this.tempPath = new Path(argMap.get("--tempDir"));
+    this.tempPath = new Path(getOption("tempDir"));
 
-    log.info("Command line arguments: {}", argMap);
+    if (!hasOption("quiet")){
+      log.info("Command line arguments: {}", argMap);
+    }
     return argMap;
   }
   
@@ -346,7 +373,11 @@ public abstract class AbstractJob extend
    * @return the requested option, or null if it has not been specified
    */
   public String getOption(String optionName) {
-    return argMap.get(keyFor(optionName));
+    List<String> list = argMap.get(keyFor(optionName));
+    if (list != null && list.isEmpty() == false){
+      return list.get(0);
+    }
+    return null;
   }
 
   /**
@@ -364,6 +395,15 @@ public abstract class AbstractJob extend
   }
 
   /**
+   * Options can occur multiple times, so return the list
+   * @param optionName The unadorned (no "--" prefixing it) option name
+   * @return The values, else null.  If the option is present, but has no values, then the result will be an empty list (Collections.emptyList())
+   */
+  public List<String> getOptions(String optionName){
+    return argMap.get(keyFor(optionName));
+  }
+
+  /**
    * @return if the requested option has been specified
    */
   public boolean hasOption(String optionName) {
@@ -390,6 +430,7 @@ public abstract class AbstractJob extend
 
     if (inputOption != null && cmdLine.hasOption(inputOption)) {
       this.inputPath = new Path(cmdLine.getValue(inputOption).toString());
+      this.inputFile = new File(cmdLine.getValue(inputOption).toString());
     }
     if (inputPath == null && conf.get("mapred.input.dir") != null) {
       this.inputPath = new Path(conf.get("mapred.input.dir"));
@@ -397,6 +438,7 @@ public abstract class AbstractJob extend
 
     if (outputOption != null && cmdLine.hasOption(outputOption)) {
       this.outputPath = new Path(cmdLine.getValue(outputOption).toString());
+      this.outputFile = new File(cmdLine.getValue(outputOption).toString());
     }
     if (outputPath == null && conf.get("mapred.output.dir") != null) {
       this.outputPath = new Path(conf.get("mapred.output.dir"));
@@ -408,25 +450,47 @@ public abstract class AbstractJob extend
         "No output specified:  or -Dmapred.output.dir must be provided to specify output directory");
   }
 
-  protected static void maybePut(Map<String, String> args, CommandLine cmdLine, Option... opt) {
+  protected static void maybePut(Map<String, List<String>> args, CommandLine cmdLine, Option... opt) {
     for (Option o : opt) {
 
       // the option appeared on the command-line, or it has a value
       // (which is likely a default value). 
-      if (cmdLine.hasOption(o) || cmdLine.getValue(o) != null) {
+      if (cmdLine.hasOption(o) || cmdLine.getValue(o) != null || (cmdLine.getValues(o) != null && cmdLine.getValues(o).isEmpty() == false)) {
 
         // nulls are ok, for cases where options are simple flags.
-        Object vo = cmdLine.getValue(o);
-        String value = vo == null ? null : vo.toString();
-        args.put(o.getPreferredName(), value);
+        List vo = cmdLine.getValues(o);
+        if (vo != null && vo.isEmpty() == false){
+          List<String> vals = new ArrayList<String>();
+          for (Object o1 : vo) {
+            vals.add(o1.toString());
+          }
+          args.put(o.getPreferredName(), vals);
+        } else {
+          args.put(o.getPreferredName(), null);
+        }
       }
     }
   }
 
-  protected static boolean shouldRunNextPhase(Map<String, String> args, AtomicInteger currentPhase) {
+  /**
+   *
+   * @param args The input argument map
+   * @param optName The adorned (including "--") option name
+   * @return The first value in the match, else null
+   */
+  public static String getOption(Map<String, List<String>> args, String optName){
+    List<String> res = args.get(optName);
+    if (res != null && res.isEmpty() == false){
+      return res.get(0);
+    }
+    return null;
+  }
+
+
+  protected static boolean shouldRunNextPhase(Map<String, List<String>> args, AtomicInteger currentPhase) {
     int phase = currentPhase.getAndIncrement();
-    String startPhase = args.get("--startPhase");
-    String endPhase = args.get("--endPhase");
+    String startPhase = getOption(args, "--startPhase");
+    String endPhase = getOption(args, "--endPhase");
     boolean phaseSkipped = (startPhase != null && phase < Integer.parseInt(startPhase))
         || (endPhase != null && phase > Integer.parseInt(endPhase));
     if (phaseSkipped) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java Mon Feb 13 15:14:18 2012
@@ -47,6 +47,7 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -94,14 +95,14 @@ public class AdjacencyMatrixJob extends 
 
     addOutputOption();
 
-    Map<String, String> parsedArgs = parseArguments(args);
+    Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
 
-    Path vertices = new Path(parsedArgs.get("--vertices"));
-    Path edges = new Path(parsedArgs.get("--edges"));
-    boolean symmetric = Boolean.parseBoolean(parsedArgs.get("--symmetric"));
+    Path vertices = new Path(getOption("vertices"));
+    Path edges = new Path(getOption("edges"));
+    boolean symmetric = Boolean.parseBoolean(getOption("symmetric"));
 
     log.info("Indexing vertices sequentially, this might take a while...");
     int numVertices = indexVertices(vertices, getOutputPath(VERTEX_INDEX));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalk.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalk.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalk.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalk.java Mon Feb 13 15:14:18 2012
@@ -46,6 +46,7 @@ import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 abstract class RandomWalk extends AbstractJob {
@@ -58,7 +59,7 @@ abstract class RandomWalk extends Abstra
   protected abstract Vector createDampingVector(int numVertices, double stayingProbability);
 
   protected void addSpecificOptions() {}
-  protected void evaluateSpecificOptions(Map<String, String> parsedArgs) {}
+  protected void evaluateSpecificOptions() {}
 
   @Override
   public final int run(String[] args) throws Exception {
@@ -70,15 +71,15 @@ abstract class RandomWalk extends Abstra
 
     addSpecificOptions();
 
-    Map<String, String> parsedArgs = parseArguments(args);
+    Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
 
-    evaluateSpecificOptions(parsedArgs);
+    evaluateSpecificOptions();
 
-    int numIterations = Integer.parseInt(parsedArgs.get("--numIterations"));
-    double stayingProbability = Double.parseDouble(parsedArgs.get("--stayingProbability"));
+    int numIterations = Integer.parseInt(getOption("numIterations"));
+    double stayingProbability = Double.parseDouble(getOption("stayingProbability"));
 
     Preconditions.checkArgument(numIterations > 0);
     Preconditions.checkArgument(stayingProbability > 0.0 && stayingProbability <= 1.0);
@@ -89,8 +90,8 @@ abstract class RandomWalk extends Abstra
     Path numVerticesPath = getTempPath(AdjacencyMatrixJob.NUM_VERTICES);
 
     /* create the adjacency matrix */
-    ToolRunner.run(getConf(), new AdjacencyMatrixJob(), new String[] { "--vertices", parsedArgs.get("--vertices"),
-        "--edges", parsedArgs.get("--edges"), "--output", getTempPath().toString() });
+    ToolRunner.run(getConf(), new AdjacencyMatrixJob(), new String[] { "--vertices", getOption("vertices"),
+        "--edges", getOption("edges"), "--output", getTempPath().toString() });
 
     int numVertices = HadoopUtil.readInt(numVerticesPath, getConf());
     Preconditions.checkArgument(numVertices > 0);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalkWithRestartJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalkWithRestartJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalkWithRestartJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/RandomWalkWithRestartJob.java Mon Feb 13 15:14:18 2012
@@ -21,6 +21,8 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
+
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -64,8 +66,8 @@ public class RandomWalkWithRestartJob ex
   }
 
   @Override
-  protected void evaluateSpecificOptions(Map<String, String> parsedArgs) {
-    sourceVertexIndex = Integer.parseInt(parsedArgs.get("--sourceVertexIndex"));
+  protected void evaluateSpecificOptions() {
+    sourceVertexIndex = Integer.parseInt(getOption("sourceVertexIndex"));
   }
 
 }
\ No newline at end of file

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java Mon Feb 13 15:14:18 2012
@@ -41,6 +41,7 @@ import org.apache.mahout.math.function.F
 
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 public class MatrixMultiplicationJob extends AbstractJob {
@@ -90,19 +91,19 @@ public class MatrixMultiplicationJob ext
     addOption("inputPathA", "ia", "Path to the first input matrix", true);
     addOption("inputPathB", "ib", "Path to the second input matrix", true);
 
-    Map<String, String> argMap = parseArguments(strings);
+    Map<String, List<String>> argMap = parseArguments(strings);
     if (argMap == null) {
       return -1;
     }
 
-    DistributedRowMatrix a = new DistributedRowMatrix(new Path(argMap.get("--inputPathA")),
-                                                      new Path(argMap.get("--tempDir")),
-                                                      Integer.parseInt(argMap.get("--numRowsA")),
-                                                      Integer.parseInt(argMap.get("--numColsA")));
-    DistributedRowMatrix b = new DistributedRowMatrix(new Path(argMap.get("--inputPathB")),
-                                                      new Path(argMap.get("--tempDir")),
-                                                      Integer.parseInt(argMap.get("--numRowsB")),
-                                                      Integer.parseInt(argMap.get("--numColsB")));
+    DistributedRowMatrix a = new DistributedRowMatrix(new Path(getOption("inputPathA")),
+                                                      new Path(getOption("tempDir")),
+                                                      Integer.parseInt(getOption("numRowsA")),
+                                                      Integer.parseInt(getOption("numColsA")));
+    DistributedRowMatrix b = new DistributedRowMatrix(new Path(getOption("inputPathB")),
+                                                      new Path(getOption("tempDir")),
+                                                      Integer.parseInt(getOption("numRowsB")),
+                                                      Integer.parseInt(getOption("numColsB")));
 
     a.setConf(new Configuration(getConf()));
     b.setConf(new Configuration(getConf()));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java Mon Feb 13 15:14:18 2012
@@ -41,6 +41,7 @@ import org.apache.mahout.math.VectorWrit
 
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -59,13 +60,13 @@ public class TransposeJob extends Abstra
     addInputOption();
     addOption("numRows", "nr", "Number of rows of the input matrix");
     addOption("numCols", "nc", "Number of columns of the input matrix");
-    Map<String, String> parsedArgs = parseArguments(strings);
+    Map<String, List<String>> parsedArgs = parseArguments(strings);
     if (parsedArgs == null) {
       return -1;
     }
 
-    int numRows = Integer.parseInt(parsedArgs.get("--numRows"));
-    int numCols = Integer.parseInt(parsedArgs.get("--numCols"));
+    int numRows = Integer.parseInt(getOption("numRows"));
+    int numCols = Integer.parseInt(getOption("numCols"));
 
     DistributedRowMatrix matrix = new DistributedRowMatrix(getInputPath(), getTempPath(), numRows, numCols);
     matrix.setConf(new Configuration(getConf()));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java Mon Feb 13 15:14:18 2012
@@ -40,6 +40,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 public class DistributedLanczosSolver extends LanczosSolver implements Tool {
@@ -50,7 +51,7 @@ public class DistributedLanczosSolver ex
 
   private Configuration conf;
 
-  private Map<String, String> parsedArgs;
+  private Map<String, List<String>> parsedArgs;
 
   /**
    * For the distributed case, the best guess at a useful initialization state for Lanczos we'll chose to be
@@ -93,21 +94,21 @@ public class DistributedLanczosSolver ex
 
   @Override
   public int run(String[] strings) throws Exception {
-    Path inputPath = new Path(parsedArgs.get("--input"));
-    Path outputPath = new Path(parsedArgs.get("--output"));
-    Path outputTmpPath = new Path(parsedArgs.get("--tempDir"));
-    Path workingDirPath = parsedArgs.get("--workingDir") != null
-                        ? new Path(parsedArgs.get("--workingDir")) : null;
-    int numRows = Integer.parseInt(parsedArgs.get("--numRows"));
-    int numCols = Integer.parseInt(parsedArgs.get("--numCols"));
-    boolean isSymmetric = Boolean.parseBoolean(parsedArgs.get("--symmetric"));
-    int desiredRank = Integer.parseInt(parsedArgs.get("--rank"));
+    Path inputPath = new Path(AbstractJob.getOption(parsedArgs, "--input"));
+    Path outputPath = new Path(AbstractJob.getOption(parsedArgs, "--output"));
+    Path outputTmpPath = new Path(AbstractJob.getOption(parsedArgs, "--tempDir"));
+    Path workingDirPath = AbstractJob.getOption(parsedArgs, "--workingDir") != null
+                        ? new Path(AbstractJob.getOption(parsedArgs, "--workingDir")) : null;
+    int numRows = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numRows"));
+    int numCols = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numCols"));
+    boolean isSymmetric = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--symmetric"));
+    int desiredRank = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--rank"));
 
-    boolean cleansvd = Boolean.parseBoolean(parsedArgs.get("--cleansvd"));
+    boolean cleansvd = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--cleansvd"));
     if (cleansvd) {
-      double maxError = Double.parseDouble(parsedArgs.get("--maxError"));
-      double minEigenvalue = Double.parseDouble(parsedArgs.get("--minEigenvalue"));
-      boolean inMemory = Boolean.parseBoolean(parsedArgs.get("--inMemory"));
+      double maxError = Double.parseDouble(AbstractJob.getOption(parsedArgs, "--maxError"));
+      double minEigenvalue = Double.parseDouble(AbstractJob.getOption(parsedArgs, "--minEigenvalue"));
+      boolean inMemory = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--inMemory"));
       return run(inputPath,
                  outputPath,
                  outputTmpPath,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java Mon Feb 13 15:14:18 2012
@@ -99,7 +99,7 @@ public class EigenVerificationJob extend
 
   @Override
   public int run(String[] args) throws Exception {
-    Map<String, String> argMap = handleArgs(args);
+    Map<String, List<String>> argMap = handleArgs(args);
     if (argMap == null) {
       return -1;
     }
@@ -108,13 +108,13 @@ public class EigenVerificationJob extend
     }
     // parse out the arguments
     runJob(getConf(),
-           new Path(argMap.get("--eigenInput")),
-           new Path(argMap.get("--corpusInput")),
+           new Path(getOption("eigenInput")),
+           new Path(getOption("corpusInput")),
            getOutputPath(),
-           argMap.get("--inMemory") != null,
-           Double.parseDouble(argMap.get("--maxError")),
-           //Double.parseDouble(argMap.get("--minEigenvalue")),
-           Integer.parseInt(argMap.get("--maxEigens")));
+           getOption("inMemory") != null,
+           Double.parseDouble(getOption("maxError")),
+           //Double.parseDouble(getOption("minEigenvalue")),
+           Integer.parseInt(getOption("maxEigens")));
     return 0;
   }
 
@@ -165,7 +165,7 @@ public class EigenVerificationJob extend
     return 0;
   }
 
-  private Map<String, String> handleArgs(String[] args) throws IOException {
+  private Map<String, List<String>> handleArgs(String[] args) throws IOException {
     addOutputOption();
     addOption("eigenInput",
               "ei",

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Mon Feb 13 15:14:18 2012
@@ -41,6 +41,7 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -84,13 +85,13 @@ public class RowSimilarityJob extends Ab
     addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false));
     addOption("threshold", "tr", "discard row pairs with a similarity value below this", false);
 
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
 
-    int numberOfColumns = Integer.parseInt(parsedArgs.get("--numberOfColumns"));
-    String similarityClassnameArg = parsedArgs.get("--similarityClassname");
+    int numberOfColumns = Integer.parseInt(getOption("numberOfColumns"));
+    String similarityClassnameArg = getOption("similarityClassname");
     String similarityClassname;
     try {
       similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname();
@@ -98,10 +99,10 @@ public class RowSimilarityJob extends Ab
       similarityClassname = similarityClassnameArg;
     }
 
-    int maxSimilaritiesPerRow = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerRow"));
-    boolean excludeSelfSimilarity = Boolean.parseBoolean(parsedArgs.get("--excludeSelfSimilarity"));
-    double threshold = parsedArgs.containsKey("--threshold") ?
-        Double.parseDouble(parsedArgs.get("--threshold")) : NO_THRESHOLD;
+    int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow"));
+    boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity"));
+    double threshold = hasOption("threshold") ?
+        Double.parseDouble(getOption("threshold")) : NO_THRESHOLD;
 
     Path weightsPath = getTempPath("weights");
     Path normsPath = getTempPath("norms.bin");

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java Mon Feb 13 15:14:18 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.math.hadoop.solver;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -41,7 +42,7 @@ import org.apache.mahout.math.solver.Pre
 public class DistributedConjugateGradientSolver extends ConjugateGradientSolver implements Tool {
 
   private Configuration conf; 
-  private Map<String, String> parsedArgs;
+  private Map<String, List<String>> parsedArgs;
 
   /**
    * 
@@ -85,15 +86,15 @@ public class DistributedConjugateGradien
 
   @Override
   public int run(String[] strings) throws Exception {
-    Path inputPath = new Path(parsedArgs.get("--input"));
-    Path outputPath = new Path(parsedArgs.get("--output"));
-    Path tempPath = new Path(parsedArgs.get("--tempDir"));
-    Path vectorPath = new Path(parsedArgs.get("--vector"));
-    int numRows = Integer.parseInt(parsedArgs.get("--numRows"));
-    int numCols = Integer.parseInt(parsedArgs.get("--numCols"));
-    int maxIterations = parsedArgs.containsKey("--maxIter") ? Integer.parseInt(parsedArgs.get("--maxIter")) : numCols;
+    Path inputPath = new Path(AbstractJob.getOption(parsedArgs, "--input"));
+    Path outputPath = new Path(AbstractJob.getOption(parsedArgs, "--output"));
+    Path tempPath = new Path(AbstractJob.getOption(parsedArgs, "--tempDir"));
+    Path vectorPath = new Path(AbstractJob.getOption(parsedArgs, "--vector"));
+    int numRows = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numRows"));
+    int numCols = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numCols"));
+    int maxIterations = parsedArgs.containsKey("--maxIter") ? Integer.parseInt(AbstractJob.getOption(parsedArgs, "--maxIter")) : numCols;
     double maxError = parsedArgs.containsKey("--maxError") 
-        ? Double.parseDouble(parsedArgs.get("--maxError")) 
+        ? Double.parseDouble(AbstractJob.getOption(parsedArgs, "--maxError"))
         : ConjugateGradientSolver.DEFAULT_MAX_ERROR;
 
     Vector b = loadInputVector(vectorPath);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java Mon Feb 13 15:14:18 2012
@@ -18,6 +18,7 @@ package org.apache.mahout.math.hadoop.st
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
 
 import com.google.common.io.Closeables;
@@ -82,24 +83,24 @@ public class SSVDCli extends AbstractJob
               String.valueOf(true));
     addOption(DefaultOptionCreator.overwriteOption().create());
 
-    Map<String, String> pargs = parseArguments(args);
+    Map<String, List<String>> pargs = parseArguments(args);
     if (pargs == null) {
       return -1;
     }
 
-    int k = Integer.parseInt(pargs.get("--rank"));
-    int p = Integer.parseInt(pargs.get("--oversampling"));
-    int r = Integer.parseInt(pargs.get("--blockHeight"));
-    int h = Integer.parseInt(pargs.get("--outerProdBlockHeight"));
-    int abh = Integer.parseInt(pargs.get("--abtBlockHeight"));
-    int q = Integer.parseInt(pargs.get("--powerIter"));
-    int minSplitSize = Integer.parseInt(pargs.get("--minSplitSize"));
-    boolean computeU = Boolean.parseBoolean(pargs.get("--computeU"));
-    boolean computeV = Boolean.parseBoolean(pargs.get("--computeV"));
-    boolean cUHalfSigma = Boolean.parseBoolean(pargs.get("--uHalfSigma"));
-    boolean cVHalfSigma = Boolean.parseBoolean(pargs.get("--vHalfSigma"));
-    int reduceTasks = Integer.parseInt(pargs.get("--reduceTasks"));
-    boolean broadcast = Boolean.parseBoolean(pargs.get("--broadcast"));
+    int k = Integer.parseInt(getOption("rank"));
+    int p = Integer.parseInt(getOption("oversampling"));
+    int r = Integer.parseInt(getOption("blockHeight"));
+    int h = Integer.parseInt(getOption("outerProdBlockHeight"));
+    int abh = Integer.parseInt(getOption("abtBlockHeight"));
+    int q = Integer.parseInt(getOption("powerIter"));
+    int minSplitSize = Integer.parseInt(getOption("minSplitSize"));
+    boolean computeU = Boolean.parseBoolean(getOption("computeU"));
+    boolean computeV = Boolean.parseBoolean(getOption("computeV"));
+    boolean cUHalfSigma = Boolean.parseBoolean(getOption("uHalfSigma"));
+    boolean cVHalfSigma = Boolean.parseBoolean(getOption("vHalfSigma"));
+    int reduceTasks = Integer.parseInt(getOption("reduceTasks"));
+    boolean broadcast = Boolean.parseBoolean(getOption("broadcast"));
     boolean overwrite =
       pargs.containsKey(keyFor(DefaultOptionCreator.OVERWRITE_OPTION));
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java Mon Feb 13 15:14:18 2012
@@ -31,6 +31,7 @@ import org.apache.mahout.common.Abstract
 import org.apache.mahout.math.VarIntWritable;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -88,8 +89,11 @@ public final class Entropy extends Abstr
     addOption("source", "s", "Sets, if the entropy is calculated for the keys or the values. Can be <key> or <value>"
         , "key");
 
-    Map<String, String> arguments = parseArguments(args);
-    source = arguments.get("--source");
+    Map<String, List<String>> arguments = parseArguments(args);
+    if (arguments == null){
+      return;
+    }
+    source = getOption("source");
     tempPath = new Path(getTempPath(), TEMP_FILE + '-' + System.currentTimeMillis());
 
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java Mon Feb 13 15:14:18 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.vectorizer.collocations.llr;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -84,7 +85,7 @@ public final class CollocDriver extends 
         + " which will be tokenized using the specified analyzer.");
     addFlag("unigram", "u", "If set, unigrams will be emitted in the final output alongside collocations");
 
-    Map<String, String> argMap = parseArguments(args);
+    Map<String, List<String>> argMap = parseArguments(args);
 
     if (argMap == null) {
       return -1;
@@ -94,45 +95,45 @@ public final class CollocDriver extends 
     Path output = getOutputPath();
 
     int maxNGramSize = DEFAULT_MAX_NGRAM_SIZE;
-    if (argMap.get("--maxNGramSize") != null) {
+    if (hasOption("maxNGramSize")) {
       try {
-        maxNGramSize = Integer.parseInt(argMap.get("--maxNGramSize"));
+        maxNGramSize = Integer.parseInt(getOption("maxNGramSize"));
       } catch (NumberFormatException ex) {
         log.warn("Could not parse ngram size option");
       }
     }
     log.info("Maximum n-gram size is: {}", maxNGramSize);
 
-    if (argMap.containsKey("--overwrite")) {
+    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
       HadoopUtil.delete(getConf(), output);
     }
 
     int minSupport = CollocReducer.DEFAULT_MIN_SUPPORT;
-    if (argMap.get("--minSupport") != null) {
-      minSupport = Integer.parseInt(argMap.get("--minSupport"));
+    if (getOption("minSupport") != null) {
+      minSupport = Integer.parseInt(getOption("minSupport"));
     }
     log.info("Minimum Support value: {}", minSupport);
 
     float minLLRValue = LLRReducer.DEFAULT_MIN_LLR;
-    if (argMap.get("--minLLR") != null) {
-      minLLRValue = Float.parseFloat(argMap.get("--minLLR"));
+    if (getOption("minLLR") != null) {
+      minLLRValue = Float.parseFloat(getOption("minLLR"));
     }
     log.info("Minimum LLR value: {}", minLLRValue);
 
     int reduceTasks = DEFAULT_PASS1_NUM_REDUCE_TASKS;
-    if (argMap.get("--maxRed") != null) {
-      reduceTasks = Integer.parseInt(argMap.get("--maxRed"));
+    if (getOption("maxRed") != null) {
+      reduceTasks = Integer.parseInt(getOption("maxRed"));
     }
     log.info("Number of pass1 reduce tasks: {}", reduceTasks);
 
-    boolean emitUnigrams = argMap.containsKey("--emitUnigrams");
+    boolean emitUnigrams = argMap.containsKey("emitUnigrams");
 
-    if (argMap.containsKey("--preprocess")) {
+    if (argMap.containsKey("preprocess")) {
       log.info("Input will be preprocessed");
 
       Class<? extends Analyzer> analyzerClass = DefaultAnalyzer.class;
-      if (argMap.get("--analyzerName") != null) {
-        String className = argMap.get("--analyzerName");
+      if (getOption("analyzerName") != null) {
+        String className = getOption("analyzerName");
         analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
         // try instantiating it, b/c there isn't any point in setting it if
         // you can't instantiate it

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java Mon Feb 13 15:14:18 2012
@@ -20,6 +20,8 @@
 package org.apache.mahout.common;
 
 import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
 import java.util.Map;
 
 import com.google.common.collect.Maps;
@@ -36,7 +38,7 @@ public final class AbstractJobTest exten
   
   @Test
   public void testFlag() throws Exception {
-    final Map<String,String> testMap = Maps.newHashMap();
+    final Map<String,List<String>> testMap = Maps.newHashMap();
     
     AbstractJobFactory fact = new AbstractJobFactory() {
       @Override
@@ -46,7 +48,7 @@ public final class AbstractJobTest exten
           public int run(String[] args) throws IOException {
             addFlag("testFlag", "t", "a simple test flag");
             
-            Map<String,String> argMap = parseArguments(args);
+            Map<String,List<String>> argMap = parseArguments(args);
             testMap.clear();
             testMap.putAll(argMap);
             return 1;
@@ -67,7 +69,7 @@ public final class AbstractJobTest exten
   
   @Test
   public void testOptions() throws Exception {
-    final Map<String,String> testMap = Maps.newHashMap();
+    final Map<String,List<String>> testMap = Maps.newHashMap();
     
     AbstractJobFactory fact = new AbstractJobFactory() {
       @Override
@@ -82,7 +84,7 @@ public final class AbstractJobTest exten
             this.addOption("hasDefault", "hd", "option w/ default", "defaultValue");
 
 
-            Map<String,String> argMap = parseArguments(args);
+            Map<String,List<String>> argMap = parseArguments(args);
             if (argMap == null) {
               return -1;
             }
@@ -103,8 +105,8 @@ public final class AbstractJobTest exten
       "--required", "requiredArg"
     });
     assertEquals("0 for no missing required options", 0, ret);
-    assertEquals("requiredArg", testMap.get("--required"));
-    assertEquals("defaultValue", testMap.get("--hasDefault"));
+    assertEquals(Collections.singletonList("requiredArg"), testMap.get("--required"));
+    assertEquals(Collections.singletonList("defaultValue"), testMap.get("--hasDefault"));
     assertNull(testMap.get("--option"));
     assertNull(testMap.get("--notRequired"));
     assertFalse(testMap.containsKey("--overwrite"));
@@ -129,10 +131,10 @@ public final class AbstractJobTest exten
       "--notRequired", "notRequired"
     });
     assertEquals("0 for no missing required options", 0, ret);
-    assertEquals("requiredArg", testMap.get("--required"));
-    assertEquals("nonDefault", testMap.get("--hasDefault"));
-    assertEquals("optionValue", testMap.get("--option"));
-    assertEquals("notRequired", testMap.get("--notRequired"));
+    assertEquals(Collections.singletonList("requiredArg"), testMap.get("--required"));
+    assertEquals(Collections.singletonList("nonDefault"), testMap.get("--hasDefault"));
+    assertEquals(Collections.singletonList("optionValue"), testMap.get("--option"));
+    assertEquals(Collections.singletonList("notRequired"), testMap.get("--notRequired"));
     assertTrue(testMap.containsKey("--overwrite"));
     
     ret = ToolRunner.run(fact.getJob(), new String[]{
@@ -143,10 +145,10 @@ public final class AbstractJobTest exten
       "-nr", "notRequired"
     });
     assertEquals("0 for no missing required options", 0, ret);
-    assertEquals("requiredArg", testMap.get("--required"));
-    assertEquals("nonDefault", testMap.get("--hasDefault"));
-    assertEquals("optionValue", testMap.get("--option"));
-    assertEquals("notRequired", testMap.get("--notRequired"));
+    assertEquals(Collections.singletonList("requiredArg"), testMap.get("--required"));
+    assertEquals(Collections.singletonList("nonDefault"), testMap.get("--hasDefault"));
+    assertEquals(Collections.singletonList("optionValue"), testMap.get("--option"));
+    assertEquals(Collections.singletonList("notRequired"), testMap.get("--notRequired"));
     assertTrue(testMap.containsKey("--overwrite"));
     
   }
@@ -164,7 +166,7 @@ public final class AbstractJobTest exten
             addOutputOption();
             
             // arg map should be null if a required option is missing.
-            Map<String, String> argMap = parseArguments(args);
+            Map<String, List<String>> argMap = parseArguments(args);
             
             if (argMap == null) {
               return -1;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1243556&r1=1243555&r2=1243556&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Mon Feb 13 15:14:18 2012
@@ -23,8 +23,11 @@ import com.google.common.io.Files;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.util.HelpFormatter;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
@@ -47,20 +50,36 @@ public final class SequenceFileDumper ex
   @Override
   public int run(String[] args) throws Exception {
 
-    addOption("seqFile", "s", "The Sequence File to read in", true);
+    addOption("seqFile", "s", "The Sequence File to read in", false);
+    addOption("seqDirectory", "d", "A directory containing sequence files to read", false);
     addOption(DefaultOptionCreator.outputOption().create());
     addOption("substring", "b", "The number of chars to print out per value", false);
     addOption(buildOption("count", "c", "Report the count only", false, false, null));
     addOption("numItems", "n", "Output at most <n> key value pairs", false);
     addOption(buildOption("facets", "fa", "Output the counts per key.  Note, if there are a lot of unique keys, this can take up a fair amount of memory", false, false, null));
-
+    addOption(buildOption("quiet", "q", "Print only file contents.", false, false, null));
 
     if (parseArguments(args) == null) {
       return -1;
     }
-    Path path = new Path(getOption("seqFile"));
+
+    Path[] pathArr= null;
     Configuration conf = new Configuration();
 
+    if (getOption("seqFile") != null) {
+      pathArr = new Path[1];
+      pathArr[0] = new Path(getOption("seqFile"));
+    } else if (getOption("seqDirectory") != null) {
+      Path dirPath = new Path(getOption("seqDirectory"));
+      FileSystem fs = dirPath.getFileSystem(conf);
+      pathArr = FileUtil.stat2Paths(fs.listStatus(dirPath, new OutputFilesFilter()));
+    }
+    if (pathArr == null) {
+      System.out.println("Must specify --seqFile (-s) or --seqDirectory (-d)!");      
+      return -1;
+    }
+
+
     Writer writer;
     boolean shouldClose;
     if (hasOption("output")) {
@@ -71,62 +90,69 @@ public final class SequenceFileDumper ex
       writer = new OutputStreamWriter(System.out);
     }
     try {
-      writer.append("Input Path: ").append(String.valueOf(path)).append('\n');
-
-      int sub = Integer.MAX_VALUE;
-      if (hasOption("substring")) {
-        sub = Integer.parseInt(getOption("substring"));
-      }
-      boolean countOnly = hasOption("count");
-      SequenceFileIterator<?, ?> iterator = new SequenceFileIterator<Writable, Writable>(path, true, conf);
-      writer.append("Key class: ").append(iterator.getKeyClass().toString());
-      writer.append(" Value Class: ").append(iterator.getValueClass().toString()).append('\n');
-      OpenObjectIntHashMap<String> facets = null;
-      if (hasOption("facets")){
-        facets = new OpenObjectIntHashMap<String>();
-      }
-      long count = 0;
-      if (countOnly) {
-        while (iterator.hasNext()) {
-          Pair<?, ?> record = iterator.next();
-          String key = record.getFirst().toString();
-          if (facets != null){
-            facets.adjustOrPutValue(key, 1, 1);//either insert or add 1
-          }
-          count++;
+      for (Path path : pathArr) {
+        if (!hasOption("quiet"))
+          writer.append("Input Path: ").append(String.valueOf(path)).append('\n');
+
+        int sub = Integer.MAX_VALUE;
+        if (hasOption("substring")) {
+          sub = Integer.parseInt(getOption("substring"));
         }
-        writer.append("Count: ").append(String.valueOf(count)).append('\n');
-      } else {
-        long numItems = Long.MAX_VALUE;
-        if (hasOption("numItems")) {
-          numItems = Long.parseLong(getOption("numItems"));
-          writer.append("Max Items to dump: ").append(String.valueOf(numItems)).append("\n");
+        boolean countOnly = hasOption("count");
+        SequenceFileIterator<?, ?> iterator = new SequenceFileIterator<Writable, Writable>(path, true, conf);
+        if (!hasOption("quiet")) {
+          writer.append("Key class: ").append(iterator.getKeyClass().toString());
+          writer.append(" Value Class: ").append(iterator.getValueClass().toString()).append('\n');
         }
-        while (iterator.hasNext() && count < numItems) {
-          Pair<?, ?> record = iterator.next();
-          String key = record.getFirst().toString();
-          writer.append("Key: ").append(key);
-          String str = record.getSecond().toString();
-          writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
-          writer.write('\n');
-          if (facets != null){
-            facets.adjustOrPutValue(key, 1, 1);//either insert or add 1
+        OpenObjectIntHashMap<String> facets = null;
+        if (hasOption("facets")){
+          facets = new OpenObjectIntHashMap<String>();
+        }
+        long count = 0;
+        if (countOnly) {
+          while (iterator.hasNext()) {
+            Pair<?, ?> record = iterator.next();
+            String key = record.getFirst().toString();
+            if (facets != null){
+              facets.adjustOrPutValue(key, 1, 1);//either insert or add 1
+            }
+            count++;
+          }
+          writer.append("Count: ").append(String.valueOf(count)).append('\n');
+        } else {
+          long numItems = Long.MAX_VALUE;
+          if (hasOption("numItems")) {
+            numItems = Long.parseLong(getOption("numItems"));
+            if (!hasOption("quiet"))
+              writer.append("Max Items to dump: ").append(String.valueOf(numItems)).append("\n");
           }
-          count++;
+          while (iterator.hasNext() && count < numItems) {
+            Pair<?, ?> record = iterator.next();
+            String key = record.getFirst().toString();
+            writer.append("Key: ").append(key);
+            String str = record.getSecond().toString();
+            writer.append(": Value: ").append(str.length() > sub 
+                                              ? str.substring(0, sub) : str);
+            writer.write('\n');
+            if (facets != null){
+              facets.adjustOrPutValue(key, 1, 1);//either insert or add 1
+            }
+            count++;
+          }
+          if (!hasOption("quiet"))
+            writer.append("Count: ").append(String.valueOf(count)).append('\n');
         }
-        writer.append("Count: ").append(String.valueOf(count)).append('\n');
-      }
-      if (facets != null) {
-        List<String> keyList = new ArrayList<String>(facets.size());
-
-        IntArrayList valueList = new IntArrayList(facets.size());
-        facets.pairsSortedByKey(keyList, valueList);
-        writer.append("-----Facets---\n");
-        writer.append("Key\t\tCount\n");
-        int i = 0;
-        for (String key : keyList) {
-          writer.append(key).append("\t\t").append(String.valueOf(valueList.get(i++))).append('\n');
+        if (facets != null) {
+          List<String> keyList = new ArrayList<String>(facets.size());
 
+          IntArrayList valueList = new IntArrayList(facets.size());
+          facets.pairsSortedByKey(keyList, valueList);
+          writer.append("-----Facets---\n");
+          writer.append("Key\t\tCount\n");
+          int i = 0;
+          for (String key : keyList) {
+            writer.append(key).append("\t\t").append(String.valueOf(valueList.get(i++))).append('\n');
+          }
         }
       }
       writer.flush();