You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/08/30 20:41:48 UTC

svn commit: r990892 [1/2] - in /mahout/trunk: buildtools/src/main/resources/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ core/src/main/java/org/apache/mahout/clusterin...

Author: srowen
Date: Mon Aug 30 18:41:46 2010
New Revision: 990892

URL: http://svn.apache.org/viewvc?rev=990892&view=rev
Log:
More assault on PMD / checkstyle warnings

Modified:
    mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java
    mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml
    mahout/trunk/etc/findbugs-exclude.xml
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java
    mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java
    mahout/trunk/maven/src/main/resources/findbugs-exclude.xml
    mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml
    mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java

Modified: mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml (original)
+++ mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml Mon Aug 30 18:41:46 2010
@@ -24,7 +24,7 @@
     <rule ref="rulesets/basic.xml/BooleanInstantiation"/>
     <rule ref="rulesets/basic.xml/CollapsibleIfStatements"/>
     <rule ref="rulesets/basic.xml/DoubleCheckedLocking"/>
-    <rule ref="rulesets/basic.xml/EmptyCatchBlock"/>
+    <!--<rule ref="rulesets/basic.xml/EmptyCatchBlock"/>-->
     <rule ref="rulesets/basic.xml/EmptyFinallyBlock"/>
     <rule ref="rulesets/basic.xml/EmptyIfStmt"/>
     <rule ref="rulesets/basic.xml/EmptyStatementNotInLoop"/>
@@ -52,7 +52,7 @@
     <!--<rule ref="rulesets/clone.xml/CloneThrowsCloneNotSupportedException"/>-->
     <!--<rule ref="rulesets/clone.xml/ProperCloneImplementation"/>-->
 
-    <rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>
+    <!--<rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>-->
     <rule ref="rulesets/codesize.xml/ExcessiveClassLength"/>
     <rule ref="rulesets/codesize.xml/ExcessiveMethodLength"/>
     <rule ref="rulesets/codesize.xml/ExcessiveParameterList"/>
@@ -78,7 +78,7 @@
     <!--<rule ref="rulesets/design.xml/AbstractClassWithoutAbstractMethod"/>-->
     <!--<rule ref="rulesets/design.xml/AccessorClassGeneration"/>-->
     <!--<rule ref="rulesets/design.xml/AssignmentToNonFinalStatic"/>-->
-    <rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>
+    <!--<rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>-->
     <!--<rule ref="rulesets/design.xml/AvoidInstanceofChecksInCatchClause"/>-->
     <rule ref="rulesets/design.xml/AvoidProtectedFieldInFinalClass"/>
     <!--<rule ref="rulesets/design.xml/AvoidReassigningParameters"/>-->
@@ -153,8 +153,8 @@
     <!--<rule ref="rulesets/naming.xml/MethodWithSameNameAsEnclosingClass"/>-->
     <!--<rule ref="rulesets/naming.xml/SuspiciousHashcodeMethodName"/>-->
     <!--<rule ref="rulesets/naming.xml/SuspiciousConstantFieldName"/>-->
-    <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>
-    <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>
+    <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>-->
+    <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>-->
     <!-- <rule ref="rulesets/naming.xml/AvoidNonConstructorMethodsWithClassName"/> -->
     <rule ref="rulesets/naming.xml/NoPackage"/>
     <rule ref="rulesets/naming.xml/PackageCase"/>
@@ -163,7 +163,7 @@
     <!--<rule ref="rulesets/optimizations.xml/MethodArgumentCouldBeFinal"/>-->
     <!--<rule ref="rulesets/optimizations.xml/AvoidInstantiatingObjectsInLoops"/>-->
     <!--<rule ref="rulesets/optimizations.xml/UseArrayListInsteadOfVector"/>-->
-    <rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>
+    <!--<rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>-->
     <rule ref="rulesets/optimizations.xml/UseStringBufferForStringAppends"/>
 
     <!--<rule ref="rulesets/strictexception.xml/AvoidCatchingThrowable"/>-->
@@ -173,7 +173,7 @@
     <!--<rule ref="rulesets/strictexception.xml/AvoidThrowingRawExceptionTypes"/>-->
     <!--<rule ref="rulesets/strictexception.xml/AvoidThrowingNullPointerException"/>-->
 
-    <rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>
+    <!--<rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>-->
     <rule ref="rulesets/strings.xml/StringInstantiation"/>
     <rule ref="rulesets/strings.xml/StringToString"/>
     <!-- <rule ref="rulesets/strings.xml/AvoidConcatenatingNonLiteralsInStringBuffer"/>-->

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Mon Aug 30 18:41:46 2010
@@ -80,24 +80,24 @@ public final class AggregateAndRecommend
 
     FSDataInputStream in = null;
     try {
-        String itemFilePathString = jobConf.get(ITEMS_FILE);        
-        if (itemFilePathString == null) {
-          itemsToRecommendFor = null;
-        } else {
-          Path unqualifiedItemsFilePath = new Path(itemFilePathString);
-          FileSystem fs = FileSystem.get(unqualifiedItemsFilePath.toUri(), jobConf);
-          itemsToRecommendFor = new FastIDSet();
-          Path itemsFilePath = unqualifiedItemsFilePath.makeQualified(fs);
-          in = fs.open(itemsFilePath);
-          for (String line : new FileLineIterable(in)) {
-        	  itemsToRecommendFor.add(Long.parseLong(line));
-          }
+      String itemFilePathString = jobConf.get(ITEMS_FILE);
+      if (itemFilePathString == null) {
+        itemsToRecommendFor = null;
+      } else {
+        Path unqualifiedItemsFilePath = new Path(itemFilePathString);
+        FileSystem fs = FileSystem.get(unqualifiedItemsFilePath.toUri(), jobConf);
+        itemsToRecommendFor = new FastIDSet();
+        Path itemsFilePath = unqualifiedItemsFilePath.makeQualified(fs);
+        in = fs.open(itemsFilePath);
+        for (String line : new FileLineIterable(in)) {
+          itemsToRecommendFor.add(Long.parseLong(line));
         }
-      } catch (IOException ioe) {
-        throw new IllegalStateException(ioe);
-      } finally {
-        IOUtils.closeStream(in);
       }
+    } catch (IOException ioe) {
+      throw new IllegalStateException(ioe);
+    } finally {
+      IOUtils.closeStream(in);
+    }
   }
 
   private static final UnaryFunction ABSOLUTE_VALUES = new UnaryFunction() {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Aug 30 18:41:46 2010
@@ -86,12 +86,12 @@ public final class ItemSimilarityJob ext
 
     addInputOption();
     addOutputOption();
-    addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " +
-        "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
-    addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar items per item to this number " +
-        "(default: " + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM));
-    addOption("maxCooccurrencesPerItem", "o", "try to cap the number of cooccurrences per item to this number " +
-        "(default: " + DEFAULT_MAX_COOCCURRENCES_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_COOCCURRENCES_PER_ITEM));
+    addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use "
+        + "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
+    addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar items per item to this number "
+        + "(default: " + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM));
+    addOption("maxCooccurrencesPerItem", "o", "try to cap the number of cooccurrences per item to this number "
+        + "(default: " + DEFAULT_MAX_COOCCURRENCES_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_COOCCURRENCES_PER_ITEM));
     addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
 
     Map<String,String> parsedArgs = parseArguments(args);
@@ -137,9 +137,9 @@ public final class ItemSimilarityJob ext
                                   VarIntWritable.class,
                                   NullWritable.class,
                                   TextOutputFormat.class);
-        countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
-        countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
-        countUsers.waitForCompletion(true);
+      countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
+      countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
+      countUsers.waitForCompletion(true);
     }
 
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -178,12 +178,12 @@ public final class ItemSimilarityJob ext
     /* Once DistributedRowMatrix uses the hadoop 0.20 API, we should refactor this call to something like
      * new DistributedRowMatrix(...).rowSimilarity(...) */
     ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] {
-        "-Dmapred.input.dir=" + itemUserMatrixPath.toString(),
-        "-Dmapred.output.dir=" + similarityMatrixPath.toString(),
-        "--numberOfColumns", String.valueOf(numberOfUsers),
-        "--similarityClassname", similarityClassName,
-        "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem + 1),
-        "--tempDir", tempDirPath.toString() });
+      "-Dmapred.input.dir=" + itemUserMatrixPath.toString(),
+      "-Dmapred.output.dir=" + similarityMatrixPath.toString(),
+      "--numberOfColumns", String.valueOf(numberOfUsers),
+      "--similarityClassname", similarityClassName,
+      "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem + 1),
+      "--tempDir", tempDirPath.toString() });
 
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
       Job mostSimilarItems = prepareJob(similarityMatrixPath,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java Mon Aug 30 18:41:46 2010
@@ -34,10 +34,6 @@ import org.apache.mahout.math.VectorWrit
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/**
- * @author jeff
- *
- */
 public class CanopyClusterer {
 
   private static final Logger log = LoggerFactory.getLogger(CanopyClusterer.class);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java Mon Aug 30 18:41:46 2010
@@ -23,7 +23,6 @@ import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.SequenceFile.Writer;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.Cluster;
@@ -283,13 +282,11 @@ public class DirichletClusterer {
    * @param vector a VectorWritable holding the Vector
    * @param clusters a List of DirichletClusters
    * @param context a Mapper.Context to emit to
-   * @throws IOException
-   * @throws InterruptedException
    */
   public void emitPointToClusters(VectorWritable vector,
                                   List<DirichletCluster> clusters,
-                                  Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
-      throws IOException, InterruptedException {
+                                  Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
+    throws IOException, InterruptedException {
     Vector pi = new DenseVector(clusters.size());
     for (int i = 0; i < clusters.size(); i++) {
       pi.set(i, clusters.get(i).getModel().pdf(vector));
@@ -309,14 +306,12 @@ public class DirichletClusterer {
    * @param clusters a List of DirichletClusters
    * @param pi the normalized pdf Vector for the point
    * @param context a Mapper.Context to emit to
-   * @throws IOException
-   * @throws InterruptedException
    */
   private void emitMostLikelyCluster(VectorWritable point,
                                      Collection<DirichletCluster> clusters,
                                      Vector pi,
-                                     Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
-      throws IOException, InterruptedException {
+                                     Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
+    throws IOException, InterruptedException {
     int clusterId = -1;
     double clusterPdf = 0;
     for (int i = 0; i < clusters.size(); i++) {
@@ -336,14 +331,12 @@ public class DirichletClusterer {
    * @param clusters a List of DirichletClusters
    * @param pi the normalized pdf Vector for the point
    * @param context a Mapper.Context to emit to
-   * @throws IOException
-   * @throws InterruptedException
    */
   private void emitAllClusters(VectorWritable point,
                                List<DirichletCluster> clusters,
                                Vector pi,
-                               Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
-      throws IOException, InterruptedException {
+                               Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
+    throws IOException, InterruptedException {
     for (int i = 0; i < clusters.size(); i++) {
       double pdf = pi.get(i);
       if (pdf > threshold && clusters.get(i).getTotalCount() > 0) {
@@ -359,9 +352,9 @@ public class DirichletClusterer {
    * @param vector a VectorWritable holding the Vector
    * @param clusters a List of DirichletClusters
    * @param writer a SequenceFile.Writer to emit to
-   * @throws IOException
    */
-  public void emitPointToClusters(VectorWritable vector, List<DirichletCluster> clusters, Writer writer) throws IOException {
+  public void emitPointToClusters(VectorWritable vector, List<DirichletCluster> clusters, Writer writer)
+    throws IOException {
     Vector pi = new DenseVector(clusters.size());
     for (int i = 0; i < clusters.size(); i++) {
       pi.set(i, clusters.get(i).getModel().pdf(vector));
@@ -381,9 +374,9 @@ public class DirichletClusterer {
    * @param clusters a List of DirichletClusters
    * @param pi the normalized pdf Vector for the point
    * @param writer a SequenceFile.Writer to emit to
-   * @throws IOException
    */
-  private void emitAllClusters(VectorWritable vector, List<DirichletCluster> clusters, Vector pi, Writer writer) throws IOException {
+  private void emitAllClusters(VectorWritable vector, List<DirichletCluster> clusters, Vector pi, Writer writer)
+    throws IOException {
     for (int i = 0; i < clusters.size(); i++) {
       double pdf = pi.get(i);
       if (pdf > threshold && clusters.get(i).getTotalCount() > 0) {
@@ -400,10 +393,11 @@ public class DirichletClusterer {
    * @param clusters a List of DirichletClusters
    * @param pi the normalized pdf Vector for the point
    * @param writer a SequenceFile.Writer to emit to
-   * @throws IOException
    */
-  private void emitMostLikelyCluster(VectorWritable vector, List<DirichletCluster> clusters, Vector pi, Writer writer)
-      throws IOException {
+  private static void emitMostLikelyCluster(VectorWritable vector,
+                                            Collection<DirichletCluster> clusters,
+                                            Vector pi,
+                                            Writer writer) throws IOException {
     double maxPdf = 0;
     int clusterId = -1;
     for (int i = 0; i < clusters.size(); i++) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Mon Aug 30 18:41:46 2010
@@ -83,7 +83,8 @@ public class DirichletDriver extends Abs
   }
 
   @Override
-  public int run(String[] args) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
+  public int run(String[] args)
+    throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
       NoSuchMethodException, InvocationTargetException, InterruptedException {
     addInputOption();
     addOutputOption();
@@ -125,7 +126,8 @@ public class DirichletDriver extends Abs
     double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
     double alpha0 = Double.parseDouble(getOption(ALPHA_OPTION));
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
-    boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD));
+    boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION)
+        .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD));
     int prototypeSize = readPrototypeSize(input);
 
     AbstractVectorModelDistribution modelDistribution = createModelDistribution(modelFactory,
@@ -217,8 +219,9 @@ public class DirichletDriver extends Abs
                             boolean runClustering,
                             boolean emitMostLikely,
                             double threshold,
-                            boolean runSequential) throws ClassNotFoundException, InstantiationException, IllegalAccessException,
-      IOException, SecurityException, NoSuchMethodException, InvocationTargetException, InterruptedException {
+                            boolean runSequential)
+    throws ClassNotFoundException, InstantiationException, IllegalAccessException,
+      IOException, NoSuchMethodException, InvocationTargetException, InterruptedException {
 
     new DirichletDriver().job(input,
                               output,
@@ -242,9 +245,7 @@ public class DirichletDriver extends Abs
    * @param alpha0 the double alpha_0 argument to the algorithm
    * @return an initialized DirichletState
    */
-  static DirichletState createState(ModelDistribution<VectorWritable> modelDistribution, int numModels, double alpha0)
-      throws ClassNotFoundException, InstantiationException, IllegalAccessException, SecurityException, NoSuchMethodException,
-      IllegalArgumentException, InvocationTargetException {
+  static DirichletState createState(ModelDistribution<VectorWritable> modelDistribution, int numModels, double alpha0) {
     return new DirichletState(modelDistribution, numModels, alpha0);
   }
 
@@ -277,18 +278,17 @@ public class DirichletDriver extends Abs
    * @param numModels the int number of models to generate
    * @param alpha0 the double alpha_0 argument to the DirichletDistribution
    */
-  private void writeInitialState(Path output,
-                                 Path stateOut,
-                                 ModelDistribution<VectorWritable> modelDistribution,
-                                 int numModels,
-                                 double alpha0) throws ClassNotFoundException, InstantiationException, IllegalAccessException,
-      IOException, SecurityException, NoSuchMethodException, InvocationTargetException {
+  private static void writeInitialState(Path output,
+                                        Path stateOut,
+                                        ModelDistribution<VectorWritable> modelDistribution,
+                                        int numModels,
+                                        double alpha0) throws IOException {
 
     DirichletState state = createState(modelDistribution, numModels, alpha0);
     writeState(output, stateOut, numModels, state);
   }
 
-  private void writeState(Path output, Path stateOut, int numModels, DirichletState state) throws IOException {
+  private static void writeState(Path output, Path stateOut, int numModels, DirichletState state) throws IOException {
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(output.toUri(), conf);
     for (int i = 0; i < numModels; i++) {
@@ -310,13 +310,13 @@ public class DirichletDriver extends Abs
    * @param alpha0 alpha_0
    * @param numReducers the number of Reducers desired
    */
-  private void runIteration(Path input,
-                            Path stateIn,
-                            Path stateOut,
-                            ModelDistribution<VectorWritable> modelDistribution,
-                            int numClusters,
-                            double alpha0,
-                            int numReducers) throws IOException, InterruptedException, ClassNotFoundException {
+  private static void runIteration(Path input,
+                                   Path stateIn,
+                                   Path stateOut,
+                                   ModelDistribution<VectorWritable> modelDistribution,
+                                   int numClusters,
+                                   double alpha0,
+                                   int numReducers) throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set(STATE_IN_KEY, stateIn.toString());
     conf.set(MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString());
@@ -378,7 +378,8 @@ public class DirichletDriver extends Abs
                   boolean runClustering,
                   boolean emitMostLikely,
                   double threshold,
-                  boolean runSequential) throws IOException, InstantiationException, IllegalAccessException,
+                  boolean runSequential)
+    throws IOException, InstantiationException, IllegalAccessException,
       ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InterruptedException {
     Path clustersOut = buildClusters(input,
                                      output,
@@ -389,7 +390,12 @@ public class DirichletDriver extends Abs
                                      numReducers,
                                      runSequential);
     if (runClustering) {
-      clusterData(input, clustersOut, new Path(output, Cluster.CLUSTERED_POINTS_DIR), emitMostLikely, threshold, runSequential);
+      clusterData(input,
+                  clustersOut,
+                  new Path(output, Cluster.CLUSTERED_POINTS_DIR),
+                  emitMostLikely,
+                  threshold,
+                  runSequential);
     }
   }
 
@@ -413,34 +419,42 @@ public class DirichletDriver extends Abs
    * @param runSequential execute sequentially if true
    * @return the Path of the final clusters directory
    */
-  public Path buildClusters(Path input,
-                            Path output,
-                            ModelDistribution<VectorWritable> modelDistribution,
-                            int numClusters,
-                            int maxIterations,
-                            double alpha0,
-                            int numReducers,
-                            boolean runSequential) throws IOException, InstantiationException, IllegalAccessException,
+  public static Path buildClusters(Path input,
+                                   Path output,
+                                   ModelDistribution<VectorWritable> modelDistribution,
+                                   int numClusters,
+                                   int maxIterations,
+                                   double alpha0,
+                                   int numReducers,
+                                   boolean runSequential)
+    throws IOException, InstantiationException, IllegalAccessException,
       ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InterruptedException {
     Path clustersIn = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
     writeInitialState(output, clustersIn, modelDistribution, numClusters, alpha0);
 
     if (runSequential) {
-      clustersIn = buildClustersSeq(input, output, modelDistribution, numClusters, maxIterations, alpha0, numReducers, clustersIn);
+      clustersIn = buildClustersSeq(input, output, modelDistribution, numClusters, maxIterations, alpha0, clustersIn);
     } else {
-      clustersIn = buildClustersMR(input, output, modelDistribution, numClusters, maxIterations, alpha0, numReducers, clustersIn);
+      clustersIn = buildClustersMR(input,
+                                   output,
+                                   modelDistribution,
+                                   numClusters,
+                                   maxIterations,
+                                   alpha0,
+                                   numReducers,
+                                   clustersIn);
     }
     return clustersIn;
   }
 
-  private Path buildClustersSeq(Path input,
-                                Path output,
-                                ModelDistribution<VectorWritable> modelDistribution,
-                                int numClusters,
-                                int maxIterations,
-                                double alpha0,
-                                int numReducers,
-                                Path clustersIn) throws IOException, ClassNotFoundException, InstantiationException,
+  private static Path buildClustersSeq(Path input,
+                                       Path output,
+                                       ModelDistribution<VectorWritable> modelDistribution,
+                                       int numClusters,
+                                       int maxIterations,
+                                       double alpha0,
+                                       Path clustersIn)
+    throws IOException, ClassNotFoundException, InstantiationException,
       IllegalAccessException, NoSuchMethodException, InvocationTargetException {
     for (int iteration = 1; iteration <= maxIterations; iteration++) {
       log.info("Iteration {}", iteration);
@@ -478,14 +492,15 @@ public class DirichletDriver extends Abs
     return clustersIn;
   }
 
-  private Path buildClustersMR(Path input,
-                               Path output,
-                               ModelDistribution<VectorWritable> modelDistribution,
-                               int numClusters,
-                               int maxIterations,
-                               double alpha0,
-                               int numReducers,
-                               Path clustersIn) throws IOException, InterruptedException, ClassNotFoundException {
+  private static Path buildClustersMR(Path input,
+                                      Path output,
+                                      ModelDistribution<VectorWritable> modelDistribution,
+                                      int numClusters,
+                                      int maxIterations,
+                                      double alpha0,
+                                      int numReducers,
+                                      Path clustersIn)
+    throws IOException, InterruptedException, ClassNotFoundException {
     for (int iteration = 1; iteration <= maxIterations; iteration++) {
       log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
@@ -512,8 +527,13 @@ public class DirichletDriver extends Abs
    *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential execute sequentially if true
    */
-  public void clusterData(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold, boolean runSequential)
-      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+  public void clusterData(Path input,
+                          Path stateIn,
+                          Path output,
+                          boolean emitMostLikely,
+                          double threshold,
+                          boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
       clusterDataSeq(input, stateIn, output, emitMostLikely, threshold);
     } else {
@@ -521,8 +541,8 @@ public class DirichletDriver extends Abs
     }
   }
 
-  private void clusterDataSeq(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold) throws IOException,
-      InstantiationException, IllegalAccessException {
+  private static void clusterDataSeq(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold)
+    throws IOException, InstantiationException, IllegalAccessException {
     Configuration conf = new Configuration();
     List<DirichletCluster> clusters = DirichletClusterMapper.loadClusters(conf, stateIn);
     DirichletClusterer clusterer = new DirichletClusterer(emitMostLikely, threshold);
@@ -552,8 +572,8 @@ public class DirichletDriver extends Abs
 
   }
 
-  private void clusterDataMR(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold) throws IOException,
-      InterruptedException, ClassNotFoundException {
+  private static void clusterDataMR(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set(STATE_IN_KEY, stateIn.toString());
     conf.set(EMIT_MOST_LIKELY_KEY, Boolean.toString(emitMostLikely));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Mon Aug 30 18:41:46 2010
@@ -83,17 +83,17 @@ public class KMeansDriver extends Abstra
                             int maxIterations,
                             int numReduceTasks,
                             boolean runClustering,
-                            boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
-    new KMeansDriver().job(input,
-                           clustersIn,
-                           output,
-                           measure,
-                           convergenceDelta,
-                           maxIterations,
-                           numReduceTasks,
-                           runClustering,
-                           runSequential);
+                            boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+    job(input,
+        clustersIn,
+        output,
+        measure,
+        convergenceDelta,
+        maxIterations,
+        numReduceTasks,
+        runClustering,
+        runSequential);
   }
 
   @Override
@@ -141,8 +141,17 @@ public class KMeansDriver extends Abstra
           .parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
     }
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
-    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
-    job(input, clusters, output, measure, convergenceDelta, maxIterations, numReduceTasks, runClustering, runSequential);
+    boolean runSequential =
+        getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
+    job(input,
+        clusters,
+        output,
+        measure,
+        convergenceDelta,
+        maxIterations,
+        numReduceTasks,
+        runClustering,
+        runSequential);
     return 0;
   }
 
@@ -167,24 +176,24 @@ public class KMeansDriver extends Abstra
    *          true if points are to be clustered after iterations are completed
    * @param runSequential if true execute sequential algorithm
    */
-  public void job(Path input,
-                  Path clustersIn,
-                  Path output,
-                  DistanceMeasure measure,
-                  double convergenceDelta,
-                  int maxIterations,
-                  int numReduceTasks,
-                  boolean runClustering,
-                  boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
-      IllegalAccessException {
+  public static void job(Path input,
+                         Path clustersIn,
+                         Path output,
+                         DistanceMeasure measure,
+                         double convergenceDelta,
+                         int maxIterations,
+                         int numReduceTasks,
+                         boolean runClustering,
+                         boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
 
     // iterate until the clusters converge
     String delta = Double.toString(convergenceDelta);
     if (log.isInfoEnabled()) {
-      log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] { input, clustersIn, output,
-          measure.getClass().getName() });
-      log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", new Object[] { convergenceDelta,
-          maxIterations, numReduceTasks, VectorWritable.class.getName() });
+      log.info("Input: {} Clusters In: {} Out: {} Distance: {}",
+          new Object[] { input, clustersIn, output, measure.getClass().getName() });
+      log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}",
+          new Object[] { convergenceDelta, maxIterations, numReduceTasks, VectorWritable.class.getName() });
     }
     Path clustersOut = buildClusters(input, clustersIn, output, measure, maxIterations, numReduceTasks, delta, runSequential);
     if (runClustering) {
@@ -213,29 +222,30 @@ public class KMeansDriver extends Abstra
    *          the convergence delta value
    * @return the Path of the final clusters directory
    */
-  public Path buildClusters(Path input,
-                            Path clustersIn,
-                            Path output,
-                            DistanceMeasure measure,
-                            int maxIterations,
-                            int numReduceTasks,
-                            String delta,
-                            boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
+  public static Path buildClusters(Path input,
+                                   Path clustersIn,
+                                   Path output,
+                                   DistanceMeasure measure,
+                                   int maxIterations,
+                                   int numReduceTasks,
+                                   String delta,
+                                   boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
-      return buildClustersSeq(input, clustersIn, output, measure, maxIterations, numReduceTasks, delta);
+      return buildClustersSeq(input, clustersIn, output, measure, maxIterations, delta);
     } else {
       return buildClustersMR(input, clustersIn, output, measure, maxIterations, numReduceTasks, delta);
     }
   }
 
-  private Path buildClustersSeq(Path input,
-                                Path clustersIn,
-                                Path output,
-                                DistanceMeasure measure,
-                                int maxIterations,
-                                int numReduceTasks,
-                                String delta) throws InstantiationException, IllegalAccessException, IOException {
+  private static Path buildClustersSeq(Path input,
+                                       Path clustersIn,
+                                       Path output,
+                                       DistanceMeasure measure,
+                                       int maxIterations,
+                                       String delta)
+    throws InstantiationException, IllegalAccessException, IOException {
+
     KMeansClusterer clusterer = new KMeansClusterer(measure);
     List<Cluster> clusters = new ArrayList<Cluster>();
 
@@ -253,7 +263,7 @@ public class KMeansDriver extends Abstra
       for (FileStatus s : status) {
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
         try {
-          WritableComparable<?> key = (WritableComparable<?>) reader.getKeyClass().newInstance();
+          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
           VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance();
           while (reader.next(key, vw)) {
             clusterer.addPointToNearestCluster(vw.get(), clusters);
@@ -286,13 +296,15 @@ public class KMeansDriver extends Abstra
     return clustersIn;
   }
 
-  private Path buildClustersMR(Path input,
-                               Path clustersIn,
-                               Path output,
-                               DistanceMeasure measure,
-                               int maxIterations,
-                               int numReduceTasks,
-                               String delta) throws IOException, InterruptedException, ClassNotFoundException {
+  private static Path buildClustersMR(Path input,
+                                      Path clustersIn,
+                                      Path output,
+                                      DistanceMeasure measure,
+                                      int maxIterations,
+                                      int numReduceTasks,
+                                      String delta)
+    throws IOException, InterruptedException, ClassNotFoundException {
+
     boolean converged = false;
     int iteration = 1;
     while (!converged && (iteration <= maxIterations)) {
@@ -324,12 +336,14 @@ public class KMeansDriver extends Abstra
    *          the number of reducer tasks
    * @return true if the iteration successfully runs
    */
-  private boolean runIteration(Path input,
-                               Path clustersIn,
-                               Path clustersOut,
-                               String measureClass,
-                               String convergenceDelta,
-                               int numReduceTasks) throws IOException, InterruptedException, ClassNotFoundException {
+  private static boolean runIteration(Path input,
+                                      Path clustersIn,
+                                      Path clustersOut,
+                                      String measureClass,
+                                      String convergenceDelta,
+                                      int numReduceTasks)
+    throws IOException, InterruptedException, ClassNotFoundException {
+
     Configuration conf = new Configuration();
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
     conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measureClass);
@@ -373,7 +387,7 @@ public class KMeansDriver extends Abstra
    * @throws IOException
    *           if there was an IO error
    */
-  private boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
+  private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
     FileStatus[] parts = fs.listStatus(filePath);
     for (FileStatus part : parts) {
       String name = part.getPath().getName();
@@ -416,27 +430,32 @@ public class KMeansDriver extends Abstra
    *          the convergence delta value
    * @param runSequential if true execute sequential algorithm
    */
-  public void clusterData(Path input,
-                          Path clustersIn,
-                          Path output,
-                          DistanceMeasure measure,
-                          String convergenceDelta,
-                          boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
+  public static void clusterData(Path input,
+                                 Path clustersIn,
+                                 Path output,
+                                 DistanceMeasure measure,
+                                 String convergenceDelta,
+                                 boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+
     if (log.isInfoEnabled()) {
       log.info("Running Clustering");
       log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] { input, clustersIn, output, measure });
       log.info("convergence: {} Input Vectors: {}", convergenceDelta, VectorWritable.class.getName());
     }
     if (runSequential) {
-      clusterDataSeq(input, clustersIn, output, measure, convergenceDelta);
+      clusterDataSeq(input, clustersIn, output, measure);
     } else {
       clusterDataMR(input, clustersIn, output, measure, convergenceDelta);
     }
   }
 
-  private void clusterDataSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure, String convergenceDelta)
-      throws IOException, InterruptedException, InstantiationException, IllegalAccessException {
+  private static void clusterDataSeq(Path input,
+                                     Path clustersIn,
+                                     Path output,
+                                     DistanceMeasure measure)
+    throws IOException, InterruptedException, InstantiationException, IllegalAccessException {
+
     KMeansClusterer clusterer = new KMeansClusterer(measure);
     List<Cluster> clusters = new ArrayList<Cluster>();
     KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
@@ -469,8 +488,13 @@ public class KMeansDriver extends Abstra
 
   }
 
-  private void clusterDataMR(Path input, Path clustersIn, Path output, DistanceMeasure measure, String convergenceDelta)
-      throws IOException, InterruptedException, ClassNotFoundException {
+  private static void clusterDataMR(Path input,
+                                    Path clustersIn,
+                                    Path output,
+                                    DistanceMeasure measure,
+                                    String convergenceDelta)
+    throws IOException, InterruptedException, ClassNotFoundException {
+
     Configuration conf = new Configuration();
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
     conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Mon Aug 30 18:41:46 2010
@@ -36,7 +36,12 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.util.Tool;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
@@ -68,10 +73,6 @@ import org.slf4j.LoggerFactory;
  * <p>Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other
  * arguments.</p>
  */
-/**
- * @author jeff
- *
- */
 public abstract class AbstractJob extends Configured implements Tool {
 
   private static final Logger log = LoggerFactory.getLogger(AbstractJob.class);
@@ -321,7 +322,7 @@ public abstract class AbstractJob extend
    *   specified or outputOption is present and neither <code>--output</code> 
    *   nor <code>-Dmapred.output.dir</code> are specified.
    */
-  protected void parseDirectories(CommandLine cmdLine) throws IllegalArgumentException {
+  protected void parseDirectories(CommandLine cmdLine) {
 
     Configuration conf = getConf();
 
@@ -340,13 +341,13 @@ public abstract class AbstractJob extend
     }
 
     if (inputOption != null && inputPath == null) {
-      throw new IllegalArgumentException("No input specified: " + inputOption.getPreferredName() + " or -Dmapred.input.dir "
-          + "must be provided to specify input directory");
+      throw new IllegalArgumentException("No input specified: " + inputOption.getPreferredName()
+          + " or -Dmapred.input.dir must be provided to specify input directory");
     }
 
     if (outputOption != null && outputPath == null) {
-      throw new IllegalArgumentException("No output specified: " + outputOption.getPreferredName() + " or -Dmapred.output.dir "
-          + "must be provided to specify output directory");
+      throw new IllegalArgumentException("No output specified: " + outputOption.getPreferredName()
+          + " or -Dmapred.output.dir must be provided to specify output directory");
     }
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java Mon Aug 30 18:41:46 2010
@@ -262,9 +262,11 @@ public final class DataLoader {
    * @param values
    *          used to convert CATEGORICAL attributes to Integer
    */
+  /*
   private static Data constructData(Attribute[] attrs, List<Instance> vectors, List<String>[] values) {
     Dataset dataset = new Dataset(attrs, values, vectors.size());
     
     return new Data(dataset, vectors);
   }
+   */
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java Mon Aug 30 18:41:46 2010
@@ -19,7 +19,6 @@ package org.apache.mahout.df.data;
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.List;
 import java.util.Locale;
 import java.util.StringTokenizer;
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java Mon Aug 30 18:41:46 2010
@@ -34,17 +34,14 @@ public class ParallelFPGrowthCombiner ex
     Reducer<LongWritable,TransactionTree,LongWritable,TransactionTree> {
   
   @Override
-  protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context) throws IOException,
-                                                                                            InterruptedException {
+  protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context)
+    throws IOException, InterruptedException {
     TransactionTree cTree = new TransactionTree();
-    int count = 0;
-    int node = 0;
     for (TransactionTree tr : values) {
       Iterator<Pair<List<Integer>,Long>> it = tr.getIterator();
       while (it.hasNext()) {
         Pair<List<Integer>,Long> p = it.next();
-        node += cTree.addPattern(p.getFirst(), p.getSecond());
-        count++;
+        cTree.addPattern(p.getFirst(), p.getSecond());
       }
     }
     

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java Mon Aug 30 18:41:46 2010
@@ -77,10 +77,8 @@ public abstract class MahoutTestCase ext
     Path testTempDirPath = getTestTempDirPath();
     Path tempFileOrDir = fs.makeQualified(new Path(testTempDirPath, name));
     fs.deleteOnExit(tempFileOrDir);
-    if (dir) {
-      if (!fs.mkdirs(tempFileOrDir)) {
-        throw new IOException("Could not create " + tempFileOrDir);
-      }
+    if (dir && !fs.mkdirs(tempFileOrDir)) {
+      throw new IOException("Could not create " + tempFileOrDir);
     }
     return tempFileOrDir;
   }
@@ -103,7 +101,7 @@ public abstract class MahoutTestCase ext
   /**
    * find a declared field in a class or one of it's super classes
    */
-  private Field findDeclaredField(Class<?> inClass, String fieldname) throws NoSuchFieldException {
+  private static Field findDeclaredField(Class<?> inClass, String fieldname) throws NoSuchFieldException {
     if (Object.class.equals(inClass)) {
       throw new NoSuchFieldException();
     }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java Mon Aug 30 18:41:46 2010
@@ -30,7 +30,7 @@ public class StringUtilsTest extends Mah
       if (this == obj) {
         return true;
       }
-      if (obj == null || !(obj instanceof DummyTest)) {
+      if (!(obj instanceof DummyTest)) {
         return false;
       }
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java Mon Aug 30 18:41:46 2010
@@ -46,7 +46,7 @@ public class PartialBuilderTest extends 
   private static final int NUM_TREES = 32;
 
   /** instances per partition */
-  private static final int numInstances = 20;
+  private static final int NUM_INSTANCES = 20;
   
   public void testProcessOutput() throws Exception {
     Configuration conf = new Configuration();
@@ -130,13 +130,13 @@ public class PartialBuilderTest extends 
         Node tree = new Leaf(rng.nextInt(100));
 
         keys[index] = new TreeID(partition, treeId);
-        values[index] = new MapredOutput(tree, nextIntArray(rng, numInstances));
+        values[index] = new MapredOutput(tree, nextIntArray(rng, NUM_INSTANCES));
 
         index++;
       }
       
       firstIds[p] = firstId;
-      firstId += numInstances;
+      firstId += NUM_INSTANCES;
     }
 
   }
@@ -210,13 +210,13 @@ public class PartialBuilderTest extends 
 
     @Override
     public void prediction(int treeId, int instanceId, int prediction) {
-      int partition = instanceId / numInstances;
+      int partition = instanceId / NUM_INSTANCES;
 
       TreeID key = new TreeID(partition, treeId);
       int index = ArrayUtils.indexOf(keys, key);
       assertTrue("key not found", index >= 0);
 
-      assertEquals(values[index].getPredictions()[instanceId % numInstances],
+      assertEquals(values[index].getPredictions()[instanceId % NUM_INSTANCES],
           prediction);
     }
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java Mon Aug 30 18:41:46 2010
@@ -43,7 +43,7 @@ public class DummyCandidate {
     if (this == obj) {
       return true;
     }
-    if (obj == null || !(obj instanceof DummyCandidate)) {
+    if (!(obj instanceof DummyCandidate)) {
       return false;
     }
 

Modified: mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml (original)
+++ mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml Mon Aug 30 18:41:46 2010
@@ -24,7 +24,7 @@
     <rule ref="rulesets/basic.xml/BooleanInstantiation"/>
     <rule ref="rulesets/basic.xml/CollapsibleIfStatements"/>
     <rule ref="rulesets/basic.xml/DoubleCheckedLocking"/>
-    <rule ref="rulesets/basic.xml/EmptyCatchBlock"/>
+    <!--<rule ref="rulesets/basic.xml/EmptyCatchBlock"/>-->
     <rule ref="rulesets/basic.xml/EmptyFinallyBlock"/>
     <rule ref="rulesets/basic.xml/EmptyIfStmt"/>
     <rule ref="rulesets/basic.xml/EmptyStatementNotInLoop"/>
@@ -52,7 +52,7 @@
     <!--<rule ref="rulesets/clone.xml/CloneThrowsCloneNotSupportedException"/>-->
     <!--<rule ref="rulesets/clone.xml/ProperCloneImplementation"/>-->
 
-    <rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>
+    <!--<rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>-->
     <rule ref="rulesets/codesize.xml/ExcessiveClassLength"/>
     <rule ref="rulesets/codesize.xml/ExcessiveMethodLength"/>
     <rule ref="rulesets/codesize.xml/ExcessiveParameterList"/>
@@ -78,7 +78,7 @@
     <!--<rule ref="rulesets/design.xml/AbstractClassWithoutAbstractMethod"/>-->
     <!--<rule ref="rulesets/design.xml/AccessorClassGeneration"/>-->
     <!--<rule ref="rulesets/design.xml/AssignmentToNonFinalStatic"/>-->
-    <rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>
+    <!--<rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>-->
     <!--<rule ref="rulesets/design.xml/AvoidInstanceofChecksInCatchClause"/>-->
     <rule ref="rulesets/design.xml/AvoidProtectedFieldInFinalClass"/>
     <!--<rule ref="rulesets/design.xml/AvoidReassigningParameters"/>-->
@@ -153,8 +153,8 @@
     <!--<rule ref="rulesets/naming.xml/MethodWithSameNameAsEnclosingClass"/>-->
     <!--<rule ref="rulesets/naming.xml/SuspiciousHashcodeMethodName"/>-->
     <!--<rule ref="rulesets/naming.xml/SuspiciousConstantFieldName"/>-->
-    <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>
-    <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>
+    <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>-->
+    <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>-->
     <!-- <rule ref="rulesets/naming.xml/AvoidNonConstructorMethodsWithClassName"/> -->
     <rule ref="rulesets/naming.xml/NoPackage"/>
     <rule ref="rulesets/naming.xml/PackageCase"/>
@@ -163,7 +163,7 @@
     <!--<rule ref="rulesets/optimizations.xml/MethodArgumentCouldBeFinal"/>-->
     <!--<rule ref="rulesets/optimizations.xml/AvoidInstantiatingObjectsInLoops"/>-->
     <!--<rule ref="rulesets/optimizations.xml/UseArrayListInsteadOfVector"/>-->
-    <rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>
+    <!--<rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>-->
     <rule ref="rulesets/optimizations.xml/UseStringBufferForStringAppends"/>
 
     <!--<rule ref="rulesets/strictexception.xml/AvoidCatchingThrowable"/>-->
@@ -173,7 +173,7 @@
     <!--<rule ref="rulesets/strictexception.xml/AvoidThrowingRawExceptionTypes"/>-->
     <!--<rule ref="rulesets/strictexception.xml/AvoidThrowingNullPointerException"/>-->
 
-    <rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>
+    <!--<rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>-->
     <rule ref="rulesets/strings.xml/StringInstantiation"/>
     <rule ref="rulesets/strings.xml/StringToString"/>
     <!-- <rule ref="rulesets/strings.xml/AvoidConcatenatingNonLiteralsInStringBuffer"/>-->

Modified: mahout/trunk/etc/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/etc/findbugs-exclude.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/etc/findbugs-exclude.xml (original)
+++ mahout/trunk/etc/findbugs-exclude.xml Mon Aug 30 18:41:46 2010
@@ -10,18 +10,21 @@
     <Bug pattern="SE_NO_SERIALVERSIONID"/>
   </Match>
   <Match>
+    <Bug pattern="EI_EXPOSE_REP"/>
+  </Match>
+  <Match>
     <Bug pattern="EI_EXPOSE_REP2"/>
- </Match>
- <Match>
+  </Match>
+  <Match>
     <Bug pattern="SIC_INNER_SHOULD_BE_STATIC_ANON"/>
- </Match>
- <Match>
+  </Match>
+  <Match>
     <Bug pattern="SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING"/>
   </Match>
- <Match>
+  <Match>
     <Bug pattern="SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE"/>
   </Match>
- <Match>
+  <Match>
     <Bug pattern="SE_BAD_FIELD"/>
   </Match>
   <Match>

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Mon Aug 30 18:41:46 2010
@@ -62,7 +62,6 @@ public final class WikipediaDatasetCreat
    * <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a
    * {@link org.apache.hadoop.io.SequenceFile}</li>
    * </ol>
-   * @throws InterruptedException 
    */
   public static void main(String[] args) throws IOException, InterruptedException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -144,14 +143,13 @@ public final class WikipediaDatasetCreat
    * @param exactMatchOnly
    *          if true, then the Wikipedia category must match exactly instead of simply containing the
    *          category string
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
    */
   public static void runJob(String input,
                             String output,
                             String catFile,
                             boolean exactMatchOnly,
-                            Class<? extends Analyzer> analyzerClass) throws IOException, InterruptedException, ClassNotFoundException {
+                            Class<? extends Analyzer> analyzerClass)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set("key.value.separator.in.input.line", " ");
     conf.set("xmlinput.start", "<text xml:space=\"preserve\">");

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java Mon Aug 30 18:41:46 2010
@@ -18,11 +18,26 @@
 package org.apache.mahout.classifier.sgd;
 
 import com.google.common.collect.Maps;
-import com.google.gson.*;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.InstanceCreator;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
 import org.apache.mahout.math.DenseMatrix;
 import org.apache.mahout.math.Matrix;
 
-import java.io.*;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.Writer;
 import java.lang.reflect.Type;
 import java.util.Iterator;
 import java.util.List;
@@ -39,14 +54,11 @@ public class LogisticModelParameters {
   private int numFeatures;
   private boolean useBias;
   private int maxTargetCategories;
-  private List<String> targetCategories = null;
+  private List<String> targetCategories;
   private double lambda;
   private double learningRate;
-  private transient CsvRecordFactory csv = null;
-  private OnlineLogisticRegression lr = null;
-
-  public LogisticModelParameters() {
-  }
+  private transient CsvRecordFactory csv;
+  private OnlineLogisticRegression lr;
 
   /**
    * Returns a CsvRecordFactory compatible with this logistic model.  The reason that this is tied
@@ -78,12 +90,14 @@ public class LogisticModelParameters {
       lr = new OnlineLogisticRegression(getMaxTargetCategories(), getNumFeatures(), new L1())
               .lambda(getLambda())
               .learningRate(getLearningRate())
-              .alpha(1 - 1e-3);
+              .alpha(1 - 1.0e-3);
     }
     return lr;
   }
 
-  public static void saveModel(Writer out, OnlineLogisticRegression model, List<String> targetCategories) throws IOException {
+  public static void saveModel(Writer out,
+                               OnlineLogisticRegression model,
+                               List<String> targetCategories) throws IOException {
     LogisticModelParameters x = new LogisticModelParameters();
     x.setTargetCategories(targetCategories);
     x.setLambda(model.getLambda());
@@ -134,7 +148,7 @@ public class LogisticModelParameters {
    * @throws IOException If there is an error opening or closing the file.
    */
   public static LogisticModelParameters loadFrom(File in) throws IOException {
-    FileReader input = new FileReader(in);
+    InputStreamReader input = new FileReader(in);
     LogisticModelParameters r = loadFrom(input);
     input.close();
     return r;
@@ -147,17 +161,17 @@ public class LogisticModelParameters {
    * @param predictorList The list of variable names.
    * @param typeList      The list of types in the format preferred by CsvRecordFactory.
    */
-  public void setTypeMap(List predictorList, List typeList) {
+  public void setTypeMap(List<String> predictorList, List<String> typeList) {
     typeMap = Maps.newHashMap();
-    if (typeList.size() == 0) {
+    if (typeList.isEmpty()) {
       throw new IllegalArgumentException("Must have at least one type specifier");
     }
-    Iterator iTypes = typeList.iterator();
+    Iterator<String> iTypes = typeList.iterator();
     String lastType = null;
     for (Object x : predictorList) {
       // type list can be short .. we just repeat last spec
       if (iTypes.hasNext()) {
-        lastType = iTypes.next().toString();
+        lastType = iTypes.next();
       }
       typeMap.put(x.toString(), lastType);
     }
@@ -234,7 +248,8 @@ public class LogisticModelParameters {
    * Tells GSON how to (de)serialize a Mahout matrix.  We assume on deserialization that
    * the matrix is dense.
    */
-  public static class MatrixTypeAdapter implements JsonDeserializer<Matrix>, JsonSerializer<Matrix>, InstanceCreator<Matrix> {
+  public static class MatrixTypeAdapter
+    implements JsonDeserializer<Matrix>, JsonSerializer<Matrix>, InstanceCreator<Matrix> {
     @Override
     public JsonElement serialize(Matrix m, Type type, JsonSerializationContext jsonSerializationContext) {
       JsonObject r = new JsonObject();
@@ -253,7 +268,7 @@ public class LogisticModelParameters {
     }
 
     @Override
-    public Matrix deserialize(JsonElement x, Type type, JsonDeserializationContext jsonDeserializationContext) throws JsonParseException {
+    public Matrix deserialize(JsonElement x, Type type, JsonDeserializationContext jsonDeserializationContext) {
       JsonObject data = x.getAsJsonObject();
       Matrix r = new DenseMatrix(data.get("rows").getAsInt(), data.get("cols").getAsInt());
       int i = 0;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java Mon Aug 30 18:41:46 2010
@@ -29,23 +29,21 @@ import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.SequentialAccessSparseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.classifier.evaluation.Auc;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
 
-/**
- *
- */
 public class RunLogistic {
-  private static final Logger log = LoggerFactory.getLogger(RunLogistic.class);
+
   private static String inputFile;
   private static String modelFile;
-  private static boolean showAuc = false;
-  private static boolean showScores = false;
-  private static boolean showConfusion = false;
+  private static boolean showAuc;
+  private static boolean showScores;
+  private static boolean showConfusion;
+
+  private RunLogistic() {
+  }
 
   public static void main(String[] args) throws IOException {
     if (parseArgs(args)) {
@@ -82,15 +80,17 @@ public class RunLogistic {
       }
       if (showConfusion) {
         Matrix m = collector.confusion();
-        System.out.printf("confusion: [[%.1f, %.1f], [%.1f, %.1f]]\n", m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
+        System.out.printf("confusion: [[%.1f, %.1f], [%.1f, %.1f]]\n",
+            m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
         m = collector.entropy();
-        System.out.printf("entropy: [[%.1f, %.1f], [%.1f, %.1f]]\n", m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
+        System.out.printf("entropy: [[%.1f, %.1f], [%.1f, %.1f]]\n",
+            m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
       }
     }
   }
 
   private static boolean parseArgs(String[] args) {
-        DefaultOptionBuilder builder = new DefaultOptionBuilder();
+    DefaultOptionBuilder builder = new DefaultOptionBuilder();
 
     Option help = builder.withLongName("help").withDescription("print this list").create();
 
@@ -102,13 +102,13 @@ public class RunLogistic {
     Option scores = builder.withLongName("scores").withDescription("print scores").create();
 
     ArgumentBuilder argumentBuilder = new ArgumentBuilder();
-    Option inputFile = builder.withLongName("input")
+    Option inputFileOption = builder.withLongName("input")
             .withRequired(true)
             .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
             .withDescription("where to get training data")
             .create();
 
-    Option modelFile = builder.withLongName("model")
+    Option modelFileOption = builder.withLongName("model")
             .withRequired(true)
             .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
             .withDescription("where to get a model")
@@ -120,8 +120,8 @@ public class RunLogistic {
             .withOption(auc)
             .withOption(scores)
             .withOption(confusion)
-            .withOption(inputFile)
-            .withOption(modelFile)
+            .withOption(inputFileOption)
+            .withOption(modelFileOption)
             .create();
 
     Parser parser = new Parser();
@@ -129,18 +129,17 @@ public class RunLogistic {
     parser.setHelpTrigger("--help");
     parser.setGroup(normalArgs);
     parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
-    CommandLine cmdLine;
-    cmdLine = parser.parseAndHelp(args);
+    CommandLine cmdLine = parser.parseAndHelp(args);
 
     if (cmdLine == null) {
       return false;
     }
 
-    RunLogistic.inputFile = getStringArgument(cmdLine, inputFile);
-    RunLogistic.modelFile = getStringArgument(cmdLine, modelFile);
-    RunLogistic.showAuc = getBooleanArgument(cmdLine, auc);
-    RunLogistic.showScores = getBooleanArgument(cmdLine, scores);
-    RunLogistic.showConfusion = getBooleanArgument(cmdLine, confusion);
+    inputFile = getStringArgument(cmdLine, inputFileOption);
+    modelFile = getStringArgument(cmdLine, modelFileOption);
+    showAuc = getBooleanArgument(cmdLine, auc);
+    showScores = getBooleanArgument(cmdLine, scores);
+    showConfusion = getBooleanArgument(cmdLine, confusion);
 
     return true;
   }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java Mon Aug 30 18:41:46 2010
@@ -226,8 +226,7 @@ public class TrainLogistic {
     parser.setHelpTrigger("--help");
     parser.setGroup(normalArgs);
     parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
-    CommandLine cmdLine;
-    cmdLine = parser.parseAndHelp(args);
+    CommandLine cmdLine = parser.parseAndHelp(args);
 
     if (cmdLine == null) {
       return false;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Mon Aug 30 18:41:46 2010
@@ -30,6 +30,7 @@ import java.awt.geom.Ellipse2D;
 import java.awt.geom.Rectangle2D;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -58,14 +59,15 @@ public class DisplayClustering extends F
 
   protected static final int SIZE = 8; // screen size in inches
 
-  private static final List<Vector> SAMPLE_PARAMS = new ArrayList<Vector>();
+  private static final Collection<Vector> SAMPLE_PARAMS = new ArrayList<Vector>();
 
   protected static final List<VectorWritable> SAMPLE_DATA = new ArrayList<VectorWritable>();
 
   protected static final List<List<Cluster>> CLUSTERS = new ArrayList<List<Cluster>>();
 
-  protected static final Color[] COLORS = { Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta,
-      Color.lightGray };
+  protected static final Color[] COLORS = {
+      Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta, Color.lightGray
+  };
 
   protected static final double T1 = 3.0;
 
@@ -166,7 +168,7 @@ public class DisplayClustering extends F
    *          a Vector of rectangle dimensions
    */
   protected static void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
-    double[] flip = { 1, -1 };
+    double[] flip = {1, -1};
     Vector v2 = v.times(new DenseVector(flip));
     v2 = v2.minus(dv.divide(2));
     int h = SIZE / 2;
@@ -186,7 +188,7 @@ public class DisplayClustering extends F
    *          a Vector of ellipse dimensions
    */
   protected static void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
-    double[] flip = { 1, -1 };
+    double[] flip = {1, -1};
     Vector v2 = v.times(new DenseVector(flip));
     v2 = v2.minus(dv.divide(2));
     int h = SIZE / 2;
@@ -220,7 +222,7 @@ public class DisplayClustering extends F
    *          double standard deviation of the samples
    */
   protected static void generateSamples(int num, double mx, double my, double sd) {
-    double[] params = { mx, my, sd, sd };
+    double[] params = {mx, my, sd, sd};
     SAMPLE_PARAMS.add(new DenseVector(params));
     log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] { num, mx, my, sd });
     for (int i = 0; i < num; i++) {
@@ -242,7 +244,8 @@ public class DisplayClustering extends F
     }
   }
 
-  protected static List<Cluster> readClusters(Path clustersIn) throws IOException, InstantiationException, IllegalAccessException {
+  protected static List<Cluster> readClusters(Path clustersIn)
+    throws IOException, InstantiationException, IllegalAccessException {
     List<Cluster> clusters = new ArrayList<Cluster>();
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
@@ -254,8 +257,12 @@ public class DisplayClustering extends F
         Writable value = (Writable) reader.getValueClass().newInstance();
         while (reader.next(key, value)) {
           Cluster cluster = (Cluster) value;
-          log.info("Reading Cluster:" + cluster.getId() + " center:" + AbstractCluster.formatVector(cluster.getCenter(), null)
-            + " numPoints:" + cluster.getNumPoints() + " radius:" + AbstractCluster.formatVector(cluster.getRadius(), null));
+          log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}", new Object[] {
+              cluster.getId(),
+              AbstractCluster.formatVector(cluster.getCenter(), null),
+              cluster.getNumPoints(),
+              AbstractCluster.formatVector(cluster.getRadius(), null)
+          });
           clusters.add(cluster);
           value = (Writable) reader.getValueClass().newInstance();
         }
@@ -291,7 +298,7 @@ public class DisplayClustering extends F
    *          double y-value standard deviation of the samples
    */
   protected static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) {
-    double[] params = { mx, my, sdx, sdy };
+    double[] params = {mx, my, sdx, sdy};
     SAMPLE_PARAMS.add(new DenseVector(params));
     log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] { num, mx, my, sdx, sdy });
     for (int i = 0; i < num; i++) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Mon Aug 30 18:41:46 2010
@@ -53,8 +53,7 @@ public final class Job extends FuzzyKMea
       log.info("Running with default arguments");
       Path output = new Path("output");
       HadoopUtil.overwriteOutput(output);
-      new Job().job(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 10, 1, (float) 2, 0.5, true);
-
+      job(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 10, 1, (float) 2, 0.5);
     }
   }
 
@@ -66,7 +65,8 @@ public final class Job extends FuzzyKMea
     addOption(DefaultOptionCreator.distanceMeasureOption().create());
     addOption(DefaultOptionCreator.clustersInOption()
         .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
-            + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first")
+            + "If k is also specified, then a random set of vectors will be selected"
+            + " and written out to this path first")
         .create());
     addOption(DefaultOptionCreator.numClustersOption()
         .withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
@@ -112,7 +112,7 @@ public final class Job extends FuzzyKMea
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
     double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
     double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
-    job(input, output, measure, t1, t2, maxIterations, numReduceTasks, fuzziness, convergenceDelta, runClustering);
+    job(input, output, measure, t1, t2, maxIterations, numReduceTasks, fuzziness, convergenceDelta);
     return 0;
   }
 
@@ -128,7 +128,7 @@ public final class Job extends FuzzyKMea
    *          the String denoting the input directory path
    * @param output
    *          the String denoting the output directory path
-   * @param measureClass
+   * @param t1
    *          the canopy T1 threshold
    * @param t2
    *          the canopy T2 threshold
@@ -140,20 +140,17 @@ public final class Job extends FuzzyKMea
    *          the float "m" fuzziness coefficient
    * @param convergenceDelta
    *          the double convergence criteria for iterations
-   * @param runClustering
-   *          the int maximum number of iterations
    */
-  private void job(Path input,
-                   Path output,
-                   DistanceMeasure measure,
-                   double t1,
-                   double t2,
-                   int maxIterations,
-                   int numReducerTasks,
-                   float fuzziness,
-                   double convergenceDelta,
-                   boolean runClustering) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
-      ClassNotFoundException {
+  private static void job(Path input,
+                          Path output,
+                          DistanceMeasure measure,
+                          double t1,
+                          double t2,
+                          int maxIterations,
+                          int numReducerTasks,
+                          float fuzziness,
+                          double convergenceDelta)
+    throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
 
     Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
     log.info("Preparing Input");

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java Mon Aug 30 18:41:46 2010
@@ -210,7 +210,7 @@ public class TestForest extends Configur
 
     if (dataFS.getFileStatus(dataPath).isDir()) {
       //the input is a directory of files
-      testDirectory(dataPath, outputPath, converter, forest, dataset, analyzer, rng);
+      testDirectory(outputPath, converter, forest, dataset, analyzer, rng);
     }  else {
       // the input is one single file
       testFile(dataPath, outputPath, converter, forest, dataset, analyzer, rng);
@@ -224,7 +224,7 @@ public class TestForest extends Configur
     }
   }
 
-  private void testDirectory(Path inPath, Path outPath, DataConverter converter, DecisionForest forest, Dataset dataset,
+  private void testDirectory(Path outPath, DataConverter converter, DecisionForest forest, Dataset dataset,
                         ResultAnalyzer analyzer, Random rng) throws IOException {
     Path[] infiles = DFUtils.listOutputFiles(dataFS, dataPath);
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java Mon Aug 30 18:41:46 2010
@@ -86,7 +86,7 @@ public class CDFitness implements Writab
     if (this == obj) {
       return true;
     }
-    if (obj == null || !(obj instanceof CDFitness)) {
+    if (!(obj instanceof CDFitness)) {
       return false;
     }