You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/03/24 21:05:55 UTC

svn commit: r1460431 [2/3] - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/common/ core/src/main/java/org/apache/mahout/cf/taste/eval/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste...

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java Sun Mar 24 20:05:50 2013
@@ -27,6 +27,8 @@ import org.apache.mahout.ep.State;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.math.stats.OnlineAuc;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -36,7 +38,8 @@ import java.util.Locale;
 import java.util.concurrent.ExecutionException;
 
 /**
- * This is a meta-learner that maintains a pool of ordinary {@link org.apache.mahout.classifier.sgd.OnlineLogisticRegression} learners. Each
+ * This is a meta-learner that maintains a pool of ordinary
+ * {@link org.apache.mahout.classifier.sgd.OnlineLogisticRegression} learners. Each
  * member of the pool has different learning rates.  Whichever of the learners in the pool falls
  * behind in terms of average log-likelihood will be tossed out and replaced with variants of the
  * survivors.  This will let us automatically derive an annealing schedule that optimizes learning
@@ -45,8 +48,9 @@ import java.util.concurrent.ExecutionExc
  * learn also decreases the number of learning rate parameters required and replaces the normal
  * hyper-parameter search.
  * <p/>
- * One wrinkle is that the pool of learners that we maintain is actually a pool of {@link org.apache.mahout.classifier.sgd.CrossFoldLearner}
- * which themselves contain several OnlineLogisticRegression objects.  These pools allow estimation
+ * One wrinkle is that the pool of learners that we maintain is actually a pool of
+ * {@link org.apache.mahout.classifier.sgd.CrossFoldLearner} which themselves contain several OnlineLogisticRegression
+ * objects.  These pools allow estimation
  * of performance on the fly even if we make many passes through the data.  This does, however,
  * increase the cost of training since if we are using 5-fold cross-validation, each vector is used
  * 4 times for training and once for classification.  If this becomes a problem, then we should
@@ -85,8 +89,9 @@ public class AdaptiveLogisticRegression 
 
   private boolean freezeSurvivors = true;
 
-  public AdaptiveLogisticRegression() {
-  }
+  private static final Logger log = LoggerFactory.getLogger(AdaptiveLogisticRegression.class);
+
+  public AdaptiveLogisticRegression() {}
 
   /**
    * Uses {@link #DEFAULT_THREAD_COUNT} and {@link #DEFAULT_POOL_SIZE}
@@ -108,7 +113,8 @@ public class AdaptiveLogisticRegression 
    * @param threadCount The number of threads to use for training
    * @param poolSize The number of {@link org.apache.mahout.classifier.sgd.CrossFoldLearner} to use.
    */
-  public AdaptiveLogisticRegression(int numCategories, int numFeatures, PriorFunction prior, int threadCount, int poolSize) {
+  public AdaptiveLogisticRegression(int numCategories, int numFeatures, PriorFunction prior, int threadCount,
+      int poolSize) {
     this.numFeatures = numFeatures;
     this.threadCount = threadCount;
     this.poolSize = poolSize;
@@ -164,6 +170,7 @@ public class AdaptiveLogisticRegression 
       });
     } catch (InterruptedException e) {
       // ignore ... shouldn't happen
+      log.warn("Ignoring exception", e);
     } catch (ExecutionException e) {
       throw new IllegalStateException(e.getCause());
     }
@@ -229,7 +236,7 @@ public class AdaptiveLogisticRegression 
       });
       ep.close();
     } catch (InterruptedException e) {
-      // ignore
+      log.warn("Ignoring exception", e);
     } catch (ExecutionException e) {
       throw new IllegalStateException(e);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java Sun Mar 24 20:05:50 2013
@@ -304,7 +304,7 @@ public class CsvRecordFactory implements
    * @return the raw target label
    */  
   public String getTargetLabel(int code) {
-    for (String key: targetDictionary.values()) {
+    for (String key : targetDictionary.values()) {
       if (targetDictionary.intern(key) == code) {
         return key;
       }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java Sun Mar 24 20:05:50 2013
@@ -20,4 +20,4 @@
  * These classes currently implement a form of feature hashing with
  * multiple probes to limit feature ambiguity.</p>
  */
-package org.apache.mahout.classifier.sgd;
\ No newline at end of file
+package org.apache.mahout.classifier.sgd;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyConfigKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyConfigKeys.java Sun Mar 24 20:05:50 2013
@@ -17,21 +17,21 @@
 
 package org.apache.mahout.clustering.canopy;
 
-public interface CanopyConfigKeys {
+public final class CanopyConfigKeys {
 
-  String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
+  private CanopyConfigKeys() {}
 
-  String CANOPY_PATH_KEY = "org.apache.mahout.clustering.canopy.path";
+  public static final String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
 
-  String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
+  public static final String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
 
-  String T3_KEY = "org.apache.mahout.clustering.canopy.t3";
+  public static final String T3_KEY = "org.apache.mahout.clustering.canopy.t3";
 
-  String T4_KEY = "org.apache.mahout.clustering.canopy.t4";
+  public static final String T4_KEY = "org.apache.mahout.clustering.canopy.t4";
 
   // keys used by Driver, Mapper, Combiner & Reducer
-  String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
+  public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
 
-  String CF_KEY = "org.apache.mahout.clustering.canopy.canopyFilter";
+  public static final String CF_KEY = "org.apache.mahout.clustering.canopy.canopyFilter";
 
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Sun Mar 24 20:05:50 2013
@@ -279,7 +279,7 @@ public class CanopyDriver extends Abstra
       clusterer.addPointToCanopies(vw.get(), canopies);
     }
 
-    Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0'+ Cluster.FINAL_ITERATION_SUFFIX);
+    Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0' + Cluster.FINAL_ITERATION_SUFFIX);
     Path path = new Path(canopyOutputDir, "part-r-00000");
     SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
         Text.class, ClusterWritable.class);
@@ -295,8 +295,8 @@ public class CanopyDriver extends Abstra
                     AbstractCluster.formatVector(canopy.getRadius(), null));
         }
         if (canopy.getNumObservations() > clusterFilter) {
-        	clusterWritable.setValue(canopy);
-        	writer.append(new Text(canopy.getIdentifier()), clusterWritable);
+          clusterWritable.setValue(canopy);
+          writer.append(new Text(canopy.getIdentifier()), clusterWritable);
         }
       }
     } finally {
@@ -375,5 +375,5 @@ public class CanopyDriver extends Abstra
                                     new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
                                     clusterClassificationThreshold, true, runSequential);
   }
-  
+
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java Sun Mar 24 20:05:50 2013
@@ -50,7 +50,7 @@ public class CanopyReducer extends Reduc
       ClusterWritable clusterWritable = new ClusterWritable();
       canopy.computeParameters();
       if (canopy.getNumObservations() > clusterFilter) {
-    	clusterWritable.setValue(canopy);
+      clusterWritable.setValue(canopy);
         context.write(new Text(canopy.getIdentifier()), clusterWritable);
       }
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Sun Mar 24 20:05:50 2013
@@ -228,8 +228,8 @@ public class DirichletDriver extends Abs
       int numModels, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException,
       InterruptedException, ClassNotFoundException {
     ClusterClassifier.writePolicy(new DirichletClusteringPolicy(numModels, alpha0), stateIn);
-    ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold,
-        emitMostLikely, runSequential);
+    ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
+        threshold, emitMostLikely, runSequential);
   }
   
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansConfigKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansConfigKeys.java Sun Mar 24 20:05:50 2013
@@ -17,18 +17,10 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-public interface FuzzyKMeansConfigKeys {
+public final class FuzzyKMeansConfigKeys {
 
-  String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
+  private FuzzyKMeansConfigKeys() {}
 
-  String CLUSTER_PATH_KEY = "org.apache.mahout.clustering.kmeans.path";
-
-  String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.kmeans.convergence";
-
-  String M_KEY = "org.apache.mahout.clustering.fuzzykmeans.m";
-
-  String EMIT_MOST_LIKELY_KEY = "org.apache.mahout.clustering.fuzzykmeans.emitMostLikely";
-
-  String THRESHOLD_KEY = "org.apache.mahout.clustering.fuzzykmeans.threshold";
+  public static final  String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
 
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Sun Mar 24 20:05:50 2013
@@ -268,7 +268,7 @@ public class FuzzyKMeansDriver extends A
     List<Cluster> clusters = Lists.newArrayList();
     FuzzyKMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
     
-    if (conf==null) {
+    if (conf == null) {
       conf = new Configuration();
     }
     
@@ -320,7 +320,7 @@ public class FuzzyKMeansDriver extends A
     throws IOException, ClassNotFoundException, InterruptedException {
     
     ClusterClassifier.writePolicy(new FuzzyKMeansClusteringPolicy(m, convergenceDelta), clustersIn);
-    ClusterClassificationDriver.run(input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, emitMostLikely,
-        runSequential);
+    ClusterClassificationDriver.run(input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
+        threshold, emitMostLikely, runSequential);
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java Sun Mar 24 20:05:50 2013
@@ -63,4 +63,4 @@ public interface ClusteringPolicy extend
    */
   void close(ClusterClassifier posterior);
   
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java Sun Mar 24 20:05:50 2013
@@ -20,12 +20,11 @@ package org.apache.mahout.clustering.kme
 /**
  * This class holds all config keys that are relevant to be used in the KMeans MapReduce configuration.
  * */
-public interface KMeansConfigKeys {
+public final class KMeansConfigKeys {
+
+  private KMeansConfigKeys() {}
+
   /** Configuration key for distance measure to use. */
-  String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
-  /** Configuration key for convergence threshold. */
-  String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.kmeans.convergence";
-  /** Configuration key for iteration cluster path */
-  String CLUSTER_PATH_KEY = "org.apache.mahout.clustering.kmeans.path";
+  public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
   
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Sun Mar 24 20:05:50 2013
@@ -142,7 +142,8 @@ public class KMeansDriver extends Abstra
     if (log.isInfoEnabled()) {
       log.info("Input: {} Clusters In: {} Out: {} Distance: {}", input, clustersIn, output,
                measure.getClass().getName());
-      log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", convergenceDelta, maxIterations, VectorWritable.class.getName());
+      log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", convergenceDelta,
+          maxIterations, VectorWritable.class.getName());
     }
     Path clustersOut = buildClusters(conf, input, clustersIn, output, measure, maxIterations, delta, runSequential);
     if (runClustering) {
@@ -259,4 +260,4 @@ public class KMeansDriver extends Abstra
         clusterClassificationThreshold, true, runSequential);
   }
   
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java Sun Mar 24 20:05:50 2013
@@ -2,4 +2,4 @@
  * This package provides an implementation of the <a href="http://en.wikipedia.org/wiki/Kmeans">k-means</a> clustering
  * algorithm.
  */
-package org.apache.mahout.clustering.kmeans;
\ No newline at end of file
+package org.apache.mahout.clustering.kmeans;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java Sun Mar 24 20:05:50 2013
@@ -31,7 +31,7 @@ public class CVB0DocInferenceMapper exte
   public void map(IntWritable docId, VectorWritable doc, Context context)
       throws IOException, InterruptedException {
     int numTopics = getNumTopics();
-    Vector docTopics = new DenseVector(new double[numTopics]).assign(1.0 /numTopics);
+    Vector docTopics = new DenseVector(new double[numTopics]).assign(1.0 / numTopics);
     Matrix docModel = new SparseRowMatrix(numTopics, doc.get().size());
     int maxIters = getMaxIters();
     ModelTrainer modelTrainer = getModelTrainer();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java Sun Mar 24 20:05:50 2013
@@ -273,7 +273,8 @@ public class CVB0Driver extends Abstract
         }
         log.info("Backfilling perplexity at iteration {}", i);
         if (!fs.exists(modelPath)) {
-          log.error("Model path '{}' does not exist; Skipping iteration {} perplexity calculation", modelPath.toString(), i);
+          log.error("Model path '{}' does not exist; Skipping iteration {} perplexity calculation",
+              modelPath.toString(), i);
           continue;
         }
         perplexity = calculatePerplexity(conf, inputPath, modelPath, i);
@@ -308,7 +309,8 @@ public class CVB0Driver extends Abstract
       if (testFraction > 0 && iterationNumber % iterationBlockSize == 0) {
         perplexities.add(calculatePerplexity(conf, inputPath, modelOutputPath, iterationNumber));
         log.info("Current perplexity = {}", perplexities.get(perplexities.size() - 1));
-        log.info("(p_{} - p_{}) / p_0 = {}; target = {}", iterationNumber, iterationNumber - iterationBlockSize, rateOfChange(perplexities), convergenceDelta);
+        log.info("(p_{} - p_{}) / p_0 = {}; target = {}", iterationNumber, iterationNumber - iterationBlockSize,
+            rateOfChange(perplexities), convergenceDelta);
       }
     }
     log.info("Completed {} iterations in {} seconds", iterationNumber,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java Sun Mar 24 20:05:50 2013
@@ -110,9 +110,9 @@ public class CachingCVB0Mapper
 
   @Override
   public void map(IntWritable docId, VectorWritable document, Context context)
-      throws IOException, InterruptedException{
+      throws IOException, InterruptedException {
     /* where to get docTopics? */
-    Vector topicVector = new DenseVector(new double[numTopics]).assign(1.0/numTopics);
+    Vector topicVector = new DenseVector(new double[numTopics]).assign(1.0 / numTopics);
     modelTrainer.train(document.get(), topicVector, true, maxIters);
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java Sun Mar 24 20:05:50 2013
@@ -95,7 +95,7 @@ public class CachingCVB0PerplexityMapper
 
   @Override
   public void map(IntWritable docId, VectorWritable document, Context context)
-      throws IOException, InterruptedException{
+      throws IOException, InterruptedException {
     if (testFraction < 1.0f && random.nextFloat() >= testFraction) {
       return;
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Sun Mar 24 20:05:50 2013
@@ -121,7 +121,7 @@ public class InMemoryCollapsedVariationa
     numTerms = terms != null ? terms.length : corpus.numCols();
     Map<String, Integer> termIdMap = Maps.newHashMap();
     if (terms != null) {
-      for (int t=0; t<terms.length; t++) {
+      for (int t = 0; t < terms.length; t++) {
         termIdMap.put(terms[t], t);
       }
     }
@@ -134,7 +134,7 @@ public class InMemoryCollapsedVariationa
   private void postInitCorpus() {
     totalCorpusWeight = 0;
     int numNonZero = 0;
-    for (int i=0; i<numDocuments; i++) {
+    for (int i = 0; i < numDocuments; i++) {
       Vector v = corpusWeights.viewRow(i);
       double norm;
       if (v != null && (norm = v.norm(1)) != 0) {
@@ -148,8 +148,7 @@ public class InMemoryCollapsedVariationa
 
   private void initializeModel() {
     TopicModel topicModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(), terms,
-                                           numUpdatingThreads,
-                                           initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
+        numUpdatingThreads, initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
     topicModel.setConf(getConf());
 
     TopicModel updatedModel = initialModelCorpusFraction == 0
@@ -157,7 +156,7 @@ public class InMemoryCollapsedVariationa
         : topicModel;
     updatedModel.setConf(getConf());
     docTopicCounts = new DenseMatrix(numDocuments, numTopics);
-    docTopicCounts.assign(1.0/numTopics);
+    docTopicCounts.assign(1.0 / numTopics);
     modelTrainer = new ModelTrainer(topicModel, updatedModel, numTrainingThreads, numTopics, numTerms);
   }
 
@@ -179,8 +178,8 @@ public class InMemoryCollapsedVariationa
     long start = System.nanoTime();
     modelTrainer.start();
     for (int docId = 0; docId < corpusWeights.numRows(); docId++) {
-      if (testFraction == 0 || docId % (1/testFraction) != 0) {
-        Vector docTopics = new DenseVector(numTopics).assign(1.0/numTopics); // docTopicCounts.getRow(docId)
+      if (testFraction == 0 || docId % (1 / testFraction) != 0) {
+        Vector docTopics = new DenseVector(numTopics).assign(1.0 / numTopics); // docTopicCounts.getRow(docId)
         modelTrainer.trainSync(corpusWeights.viewRow(docId), docTopics , true, 10);
       }
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java Sun Mar 24 20:05:50 2013
@@ -124,7 +124,7 @@ public class ModelTrainer {
       int docId = docSlice.index();
       Vector document = docSlice.vector();
       Vector topicDist = topicSlice.vector();
-      if (testFraction == 0 || docId % (1/testFraction) == 0) {
+      if (testFraction == 0 || docId % (1 / testFraction) == 0) {
         trainSync(document, topicDist, false, 10);
         perplexity += readModel.perplexity(document, topicDist);
         matrixNorm += document.norm(1);
@@ -166,7 +166,7 @@ public class ModelTrainer {
         train(document, topicDist, true, numDocTopicIters);
         if (log.isDebugEnabled()) {
           times[i % times.length] =
-              (System.nanoTime() - start) /(1.0e6 * document.getNumNondefaultElements());
+              (System.nanoTime() - start) / (1.0e6 * document.getNumNondefaultElements());
           if (i % 100 == 0) {
             long time = System.nanoTime() - startTime;
             log.debug("trained {} documents in {}ms", i, time / 1.0e6);
@@ -257,7 +257,7 @@ public class ModelTrainer {
     readModel.persist(outputPath, true);
   }
 
-  private static class TrainerRunnable implements Runnable, Callable<Double> {
+  private static final class TrainerRunnable implements Runnable, Callable<Double> {
     private final TopicModel readModel;
     private final TopicModel writeModel;
     private final Vector document;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java Sun Mar 24 20:05:50 2013
@@ -282,7 +282,7 @@ public class TopicModel implements Confi
       topics.set(x, docTopicModel.viewRow(x).norm(1));
     }
     // now renormalize so that sum_x(p(x|doc)) = 1
-    topics.assign(Functions.mult(1/topics.norm(1)));
+    topics.assign(Functions.mult(1 / topics.norm(1)));
   }
 
   public Vector infer(Vector original, Vector docTopics) {
@@ -357,7 +357,8 @@ public class TopicModel implements Confi
         int termIndex = e.index();
 
         // calc un-normalized p(topic x | term a, document i)
-        double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha) / (topicSum + eta * numTerms);
+        double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha) /
+            (topicSum + eta * numTerms);
         termTopicRow.set(termIndex, termTopicLikelihood);
       }
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java Sun Mar 24 20:05:50 2013
@@ -42,7 +42,7 @@ public class MeanShiftCanopyClusterMappe
   protected void map(WritableComparable<?> key, ClusterWritable clusterWritable, Context context)
     throws IOException, InterruptedException {
     // canopies use canopyIds assigned when input vectors are processed as vectorIds too
-	MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
+  MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
     int vectorId = canopy.getId();
     for (MeanShiftCanopy msc : canopies) {
       for (int containedId : msc.getBoundPoints().toList()) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java Sun Mar 24 20:05:50 2013
@@ -17,15 +17,17 @@
 
 package org.apache.mahout.clustering.meanshift;
 
-public interface MeanShiftCanopyConfigKeys {
+public final class MeanShiftCanopyConfigKeys {
 
-	// keys used by Driver, Mapper, Combiner & Reducer
-	String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
-	String KERNEL_PROFILE_KEY = "org.apache.mahout.clustering.canopy.kernelprofile";
-	String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
-	String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
-	String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path";
-	String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence";
-	String CLUSTER_POINTS_KEY = "org.apache.mahout.clustering.meanshift.clusterPointsKey";
+  private MeanShiftCanopyConfigKeys() {}
+
+  // keys used by Driver, Mapper, Combiner & Reducer
+  public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
+  public static final String KERNEL_PROFILE_KEY = "org.apache.mahout.clustering.canopy.kernelprofile";
+  public static final String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
+  public static final String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
+  public static final String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path";
+  public static final String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence";
+  public static final String CLUSTER_POINTS_KEY = "org.apache.mahout.clustering.meanshift.clusterPointsKey";
 
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Sun Mar 24 20:05:50 2013
@@ -215,10 +215,10 @@ public class MeanShiftCanopyDriver exten
         for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(
             s.getPath(), conf)) {
           MeanShiftCanopy initialCanopy = MeanShiftCanopy.initialCanopy(value.get(),
-				      id++, measure);
+              id++, measure);
           ClusterWritable clusterWritable = new ClusterWritable();
           clusterWritable.setValue(initialCanopy);
-		  writer.append(new Text(), clusterWritable);
+      writer.append(new Text(), clusterWritable);
         }
       } finally {
         Closeables.closeQuietly(writer);
@@ -308,8 +308,8 @@ public class MeanShiftCanopyDriver exten
     FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
     for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(
         clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
-    	MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
-	    clusterer.mergeCanopy(canopy, clusters);
+      MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
+      clusterer.mergeCanopy(canopy, clusters);
     }
     boolean[] converged = { false };
     int iteration = 1;
@@ -340,8 +340,8 @@ public class MeanShiftCanopyDriver exten
       clustersIn = clustersOut;
       iteration++;
     }
-    Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1));
-    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + "-final");
+    Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration - 1));
+    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration - 1) + "-final");
     FileSystem.get(fromPath.toUri(), conf).rename(fromPath, finalClustersIn);
     return finalClustersIn;
   }
@@ -379,8 +379,8 @@ public class MeanShiftCanopyDriver exten
         conf.set(MAPRED_REDUCE_TASKS, String.valueOf(numReducers));
       }
     }
-    Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1));
-    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + Cluster.FINAL_ITERATION_SUFFIX);
+    Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration - 1));
+    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration - 1) + Cluster.FINAL_ITERATION_SUFFIX);
     FileSystem.get(fromPath.toUri(), conf).rename(fromPath, finalClustersIn);
     return finalClustersIn;
   }
@@ -476,7 +476,7 @@ public class MeanShiftCanopyDriver exten
     for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(
         clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
       MeanShiftCanopy cluster = (MeanShiftCanopy) clusterWritable.getValue();
-	  clusters.add(cluster);
+    clusters.add(cluster);
     }
     // iterate over all points, assigning each to the closest canopy and
     // outputting that clustering
@@ -491,7 +491,7 @@ public class MeanShiftCanopyDriver exten
         for (Pair<Writable, ClusterWritable> record : new SequenceFileIterable<Writable, ClusterWritable>(
             s.getPath(), conf)) {
           ClusterWritable clusterWritable = record.getSecond();
-		  MeanShiftCanopy canopy = (MeanShiftCanopy) clusterWritable.getValue();
+      MeanShiftCanopy canopy = (MeanShiftCanopy) clusterWritable.getValue();
           MeanShiftCanopy closest = MeanShiftCanopyClusterer
               .findCoveringCanopy(canopy, clusters);
           writer.append(new IntWritable(closest.getId()),

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Sun Mar 24 20:05:50 2013
@@ -30,7 +30,7 @@ import org.apache.mahout.clustering.iter
 import com.google.common.collect.Lists;
 
 public class MeanShiftCanopyReducer extends Reducer<Text,ClusterWritable,Text,ClusterWritable> {
-  
+
   private final Collection<MeanShiftCanopy> canopies = Lists.newArrayList();
   private MeanShiftCanopyClusterer clusterer;
   private boolean allConverged = true;
@@ -45,10 +45,10 @@ public class MeanShiftCanopyReducer exte
   protected void reduce(Text key, Iterable<ClusterWritable> values, Context context)
     throws IOException, InterruptedException {
     for (ClusterWritable clusterWritable : values) {
-    	MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
-	    clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
+      MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
+      clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
     }
-    
+
     for (MeanShiftCanopy canopy : canopies) {
       boolean converged = clusterer.shiftToMean(canopy);
       if (converged) {
@@ -59,7 +59,7 @@ public class MeanShiftCanopyReducer exte
       clusterWritable.setValue(canopy);
       context.write(new Text(canopy.getIdentifier()), clusterWritable);
     }
-    
+
   }
 
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java Sun Mar 24 20:05:50 2013
@@ -10,4 +10,4 @@
  *
  * <p>Output of each clustering algorithm is either a hard or soft assignment of items to clusters.</p>
  */
-package org.apache.mahout.clustering;
\ No newline at end of file
+package org.apache.mahout.clustering;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Sun Mar 24 20:05:50 2013
@@ -135,7 +135,7 @@ public class EigencutsDriver extends Abs
       DistributedRowMatrix U = performEigenDecomposition(conf, L, state, eigenrank, overshoot, outputCalc);
       U.setConf(new Configuration(conf));
       List<Double> eigenValues = Lists.newArrayList();
-      for (int i=0; i<eigenrank; i++) {
+      for (int i = 0; i < eigenrank; i++) {
         eigenValues.set(i, state.getSingularValue(i));
       }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsKeys.java Sun Mar 24 20:05:50 2013
@@ -20,65 +20,67 @@ package org.apache.mahout.clustering.spe
 /**
  * Configuration keys for the Eigencuts algorithm (analogous to KMeansConfigKeys)
  */
-public interface EigencutsKeys {
+public final class EigencutsKeys {
+
+  private EigencutsKeys() {}
 
   /**
    * B_0, or the user-specified minimum eigenflow half-life threshold
    * for an eigenvector/eigenvalue pair to be considered. Increasing
    * B_0 equates to fewer clusters
    */
-  String BETA = "org.apache.mahout.clustering.spectral.beta";
+  public static final String BETA = "org.apache.mahout.clustering.spectral.beta";
 
   /**
    * Tau, or the user-specified threshold for making cuts (setting edge
    * affinities to 0) after performing non-maximal suppression on edge weight
    * sensitivies. Increasing tau equates to more edge cuts
    */
-  String TAU = "org.apache.mahout.clustering.spectral.tau";
+  public static final String TAU = "org.apache.mahout.clustering.spectral.tau";
 
   /**
    * The normalization factor for computing the cut threshold
    */
-  String DELTA = "org.apache.mahout.clustering.spectral.delta";
+  public static final String DELTA = "org.apache.mahout.clustering.spectral.delta";
 
   /**
    * Epsilon, or the user-specified coefficient that works in tandem with
    * MINIMUM_HALF_LIFE to determine which eigenvector/eigenvalue pairs to use.
    * Increasing epsilon equates to fewer eigenvector/eigenvalue pairs
    */
-  String EPSILON = "org.apache.mahout.clustering.spectral.epsilon";
+  public static final String EPSILON = "org.apache.mahout.clustering.spectral.epsilon";
 
   /**
    * Base path to the location on HDFS where the diagonal matrix (a vector)
    * and the list of eigenvalues will be stored for one of the map/reduce
    * jobs in Eigencuts.
    */
-  String VECTOR_CACHE_BASE = "org.apache.mahout.clustering.spectral.eigencuts.vectorcache";
+  public static final String VECTOR_CACHE_BASE = "org.apache.mahout.clustering.spectral.eigencuts.vectorcache";
 
   /**
    * Refers to the dimensions of the raw affinity matrix input. Since this
    * matrix is symmetrical, it is a square matrix, hence all its dimensions
    * are equal.
    */
-  String AFFINITY_DIMENSIONS = "org.apache.mahout.clustering.spectral.eigencuts.affinitydimensions";
+  public static final String AFFINITY_DIMENSIONS = "org.apache.mahout.clustering.spectral.eigencuts.affinitydimensions";
 
   /**
    * Refers to the Path to the SequenceFile representing the affinity matrix
    */
-  String AFFINITY_PATH = "org.apache.mahout.clustering.spectral.eigencuts.affinitypath";
+  public static final String AFFINITY_PATH = "org.apache.mahout.clustering.spectral.eigencuts.affinitypath";
 
   /**
    * Refers to the Path to the SequenceFile representing the cut matrix
    */
-  String CUTMATRIX_PATH = "org.apache.mahout.clustering.spectral.eigencuts.cutmatrixpath";
+  public static final String CUTMATRIX_PATH = "org.apache.mahout.clustering.spectral.eigencuts.cutmatrixpath";
 
   /**
    * Sets the SequenceFile index for the list of eigenvalues.
    */
-  int EIGENVALUES_CACHE_INDEX = 0;
+  public static final int EIGENVALUES_CACHE_INDEX = 0;
 
   /**
    * Sets the SequenceFile index for the diagonal matrix.
    */
-  int DIAGONAL_CACHE_INDEX = 1;
+  public static final int DIAGONAL_CACHE_INDEX = 1;
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java Sun Mar 24 20:05:50 2013
@@ -50,83 +50,83 @@ import org.apache.mahout.math.hadoop.sto
  */
 public class SpectralKMeansDriver extends AbstractJob {
 
-	public static final double OVERSHOOTMULTIPLIER = 2.0;
-	public static final int REDUCERS = 10;
-	public static final int BLOCKHEIGHT = 30000;
-	public static final int OVERSAMPLING = 15;
-	public static final int POWERITERS = 0;
-
-	public static void main(String[] args) throws Exception {
-		ToolRunner.run(new SpectralKMeansDriver(), args);
-	}
-  
-	@Override
-	public int run(String[] arg0)
-			throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
-    
-		Configuration conf = getConf();
-		addInputOption();
-		addOutputOption();
-		addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
-		addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
-		addOption(DefaultOptionCreator.distanceMeasureOption().create());
-		addOption(DefaultOptionCreator.convergenceOption().create());
-		addOption(DefaultOptionCreator.maxIterationsOption().create());
-		addOption(DefaultOptionCreator.overwriteOption().create());
-		addFlag("usessvd", "ssvd", "Uses SSVD as the eigensolver. Default is the Lanczos solver.");
-		addOption("reduceTasks", "t", "Number of reducers for SSVD", String.valueOf(REDUCERS));
-		addOption("outerProdBlockHeight", "oh", "Block height of outer products for SSVD", String.valueOf(BLOCKHEIGHT));
-		addOption("oversampling", "p", "Oversampling parameter for SSVD", String.valueOf(OVERSAMPLING));
-		addOption("powerIter", "q", "Additional power iterations for SSVD", String.valueOf(POWERITERS));
-		
-		Map<String, List<String>> parsedArgs = parseArguments(arg0);
-		if (parsedArgs == null) {
-		  return 0;
-		}
-		
-		Path input = getInputPath();
-		Path output = getOutputPath();
-		if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
-			HadoopUtil.delete(conf, output);
-		}
-		int numDims = Integer.parseInt(getOption("dimensions"));
-		int clusters = Integer.parseInt(getOption("clusters"));
-		String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
-		DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
-		double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
-		int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
-		
-		Path tempdir = new Path(getOption("tempDir"));
-		boolean ssvd = parsedArgs.containsKey("--usessvd");
-		if (ssvd) {
-		    int reducers = Integer.parseInt(getOption("reduceTasks"));
-		    int blockheight = Integer.parseInt(getOption("outerProdBlockHeight"));
-		    int oversampling = Integer.parseInt(getOption("oversampling"));
-		    int poweriters = Integer.parseInt(getOption("powerIter"));
-		    run(conf, input, output, numDims, clusters, measure, convergenceDelta,
-		            maxIterations, tempdir, true, reducers, blockheight, oversampling, poweriters);
-		} else {
-		    run(conf, input, output, numDims, clusters, measure, convergenceDelta,
-		            maxIterations, tempdir, false);
-		}
-		
-		return 0;
-	}
-	
-	public static void run(
-	        Configuration conf,
-	        Path input,
-	        Path output,
-	        int numDims,
-	        int clusters,
-	        DistanceMeasure measure,
-	        double convergenceDelta,
-	        int maxIterations,
-	        Path tempDir,
-	        boolean ssvd) throws IOException, InterruptedException, ClassNotFoundException {
-	    run(conf, input, output, numDims, clusters, measure, convergenceDelta,
-	            maxIterations, tempDir, ssvd, REDUCERS, BLOCKHEIGHT, OVERSAMPLING, POWERITERS);
-	}
+  public static final double OVERSHOOTMULTIPLIER = 2.0;
+  public static final int REDUCERS = 10;
+  public static final int BLOCKHEIGHT = 30000;
+  public static final int OVERSAMPLING = 15;
+  public static final int POWERITERS = 0;
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new SpectralKMeansDriver(), args);
+  }
+
+  @Override
+  public int run(String[] arg0)
+      throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
+
+    Configuration conf = getConf();
+    addInputOption();
+    addOutputOption();
+    addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
+    addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
+    addOption(DefaultOptionCreator.distanceMeasureOption().create());
+    addOption(DefaultOptionCreator.convergenceOption().create());
+    addOption(DefaultOptionCreator.maxIterationsOption().create());
+    addOption(DefaultOptionCreator.overwriteOption().create());
+    addFlag("usessvd", "ssvd", "Uses SSVD as the eigensolver. Default is the Lanczos solver.");
+    addOption("reduceTasks", "t", "Number of reducers for SSVD", String.valueOf(REDUCERS));
+    addOption("outerProdBlockHeight", "oh", "Block height of outer products for SSVD", String.valueOf(BLOCKHEIGHT));
+    addOption("oversampling", "p", "Oversampling parameter for SSVD", String.valueOf(OVERSAMPLING));
+    addOption("powerIter", "q", "Additional power iterations for SSVD", String.valueOf(POWERITERS));
+
+    Map<String, List<String>> parsedArgs = parseArguments(arg0);
+    if (parsedArgs == null) {
+      return 0;
+    }
+
+    Path input = getInputPath();
+    Path output = getOutputPath();
+    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
+      HadoopUtil.delete(conf, output);
+    }
+    int numDims = Integer.parseInt(getOption("dimensions"));
+    int clusters = Integer.parseInt(getOption("clusters"));
+    String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
+    DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
+    double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
+    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
+
+    Path tempdir = new Path(getOption("tempDir"));
+    boolean ssvd = parsedArgs.containsKey("--usessvd");
+    if (ssvd) {
+        int reducers = Integer.parseInt(getOption("reduceTasks"));
+        int blockheight = Integer.parseInt(getOption("outerProdBlockHeight"));
+        int oversampling = Integer.parseInt(getOption("oversampling"));
+        int poweriters = Integer.parseInt(getOption("powerIter"));
+        run(conf, input, output, numDims, clusters, measure, convergenceDelta,
+                maxIterations, tempdir, true, reducers, blockheight, oversampling, poweriters);
+    } else {
+        run(conf, input, output, numDims, clusters, measure, convergenceDelta,
+                maxIterations, tempdir, false);
+    }
+
+    return 0;
+  }
+
+  public static void run(
+          Configuration conf,
+          Path input,
+          Path output,
+          int numDims,
+          int clusters,
+          DistanceMeasure measure,
+          double convergenceDelta,
+          int maxIterations,
+          Path tempDir,
+          boolean ssvd) throws IOException, InterruptedException, ClassNotFoundException {
+      run(conf, input, output, numDims, clusters, measure, convergenceDelta,
+              maxIterations, tempDir, ssvd, REDUCERS, BLOCKHEIGHT, OVERSAMPLING, POWERITERS);
+  }
 
   /**
    * Run the Spectral KMeans clustering on the supplied arguments
@@ -146,125 +146,125 @@ public class SpectralKMeansDriver extend
    * @param oversampling
    * @param poweriters
    */
-	public static void run(
-		  Configuration conf,
-		  Path input,
-		  Path output,
-		  int numDims,
-		  int clusters,
-		  DistanceMeasure measure,
-		  double convergenceDelta,
-		  int maxIterations,
-		  Path tempDir,
-		  boolean ssvd,
-		  int numReducers,
-		  int blockHeight,
-		  int oversampling,
-		  int poweriters)
-				  throws IOException, InterruptedException, ClassNotFoundException {
-    
-		Path outputCalc = new Path(tempDir, "calculations");
-		Path outputTmp = new Path(tempDir, "temporary");
-
-		// Take in the raw CSV text file and split it ourselves,
-		// creating our own SequenceFiles for the matrices to read later 
-		// (similar to the style of syntheticcontrol.canopy.InputMapper)
-		Path affSeqFiles = new Path(outputCalc, "seqfile");
-		AffinityMatrixInputJob.runJob(input, affSeqFiles, numDims, numDims);
-		
-		// Construct the affinity matrix using the newly-created sequence files
-		DistributedRowMatrix A = 
-				new DistributedRowMatrix(affSeqFiles, new Path(outputTmp, "afftmp"), numDims, numDims); 
-		
-		Configuration depConf = new Configuration(conf);
-		A.setConf(depConf);
-		
-		// Construct the diagonal matrix D (represented as a vector)
-		Vector D = MatrixDiagonalizeJob.runJob(affSeqFiles, numDims);
-		
-		//Calculate the normalized Laplacian of the form: L = D^(-0.5)AD^(-0.5)
-		DistributedRowMatrix L = VectorMatrixMultiplicationJob.runJob(affSeqFiles, D,
-				new Path(outputCalc, "laplacian"), new Path(outputCalc, outputCalc));
-		L.setConf(depConf);
-		
-		Path data;
-		
-		if (ssvd) {
-			// SSVD requires an array of Paths to function. So we pass in an array of length one
-			Path [] LPath = new Path[1];
-			LPath[0] = L.getRowPath();
-			
-			Path SSVDout = new Path(outputCalc, "SSVD");
-			
-			SSVDSolver solveIt = new SSVDSolver(
-					depConf, 
-					LPath, 
-					SSVDout, 
-					blockHeight,
-					clusters, 
-					oversampling, 
-					numReducers);
-			
-			solveIt.setComputeV(false); 
-			solveIt.setComputeU(true);
-			solveIt.setOverwrite(true);
-			solveIt.setQ(poweriters);
-			//solveIt.setBroadcast(false);
-			solveIt.run();
-			data = new Path(solveIt.getUPath());
-		} else {
-			// Perform eigen-decomposition using LanczosSolver
-			// since some of the eigen-output is spurious and will be eliminated
-			// upon verification, we have to aim to overshoot and then discard
-			// unnecessary vectors later
-			int overshoot = Math.min((int) ((double) clusters * OVERSHOOTMULTIPLIER), numDims);
-			DistributedLanczosSolver solver = new DistributedLanczosSolver();
-			LanczosState state = new LanczosState(L, overshoot, solver.getInitialVector(L));
-			Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors");
-			
-			solver.runJob(conf,
-			              state,
-			              overshoot,
-			              true,
-			              lanczosSeqFiles.toString());
-			
-			// perform a verification
-			EigenVerificationJob verifier = new EigenVerificationJob();
-			Path verifiedEigensPath = new Path(outputCalc, "eigenverifier");
-			verifier.runJob(conf, 
-							lanczosSeqFiles, 
-							L.getRowPath(), 
-							verifiedEigensPath, 
-							true, 
-							1.0, 
-							clusters);
-			
-			Path cleanedEigens = verifier.getCleanedEigensPath();
-			DistributedRowMatrix W = new DistributedRowMatrix(
-					cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
-			W.setConf(depConf);
-			DistributedRowMatrix Wtrans = W.transpose();
-			data = Wtrans.getRowPath();
-		}
-		
-		// Normalize the rows of Wt to unit length
-		// normalize is important because it reduces the occurrence of two unique clusters  combining into one 
-		Path unitVectors = new Path(outputCalc, "unitvectors");
-		
-		UnitVectorizerJob.runJob(data, unitVectors);
-		
-		DistributedRowMatrix Wt = new DistributedRowMatrix(
-				unitVectors, new Path(unitVectors, "tmp"), clusters, numDims);
-		Wt.setConf(depConf);
-		data = Wt.getRowPath();
-		
-		// Generate random initial clusters
-		Path initialclusters = RandomSeedGenerator.buildRandom(conf, data,
-				new Path(output, Cluster.INITIAL_CLUSTERS_DIR), clusters, measure);
-		   
-		    // Run the KMeansDriver
-		Path answer = new Path(output, "kmeans_out");
-		KMeansDriver.run(conf, data, initialclusters, answer,
-				measure,convergenceDelta, maxIterations, true, 0.0, false);
+  public static void run(
+      Configuration conf,
+      Path input,
+      Path output,
+      int numDims,
+      int clusters,
+      DistanceMeasure measure,
+      double convergenceDelta,
+      int maxIterations,
+      Path tempDir,
+      boolean ssvd,
+      int numReducers,
+      int blockHeight,
+      int oversampling,
+      int poweriters)
+          throws IOException, InterruptedException, ClassNotFoundException {
+
+    Path outputCalc = new Path(tempDir, "calculations");
+    Path outputTmp = new Path(tempDir, "temporary");
+
+    // Take in the raw CSV text file and split it ourselves,
+    // creating our own SequenceFiles for the matrices to read later
+    // (similar to the style of syntheticcontrol.canopy.InputMapper)
+    Path affSeqFiles = new Path(outputCalc, "seqfile");
+    AffinityMatrixInputJob.runJob(input, affSeqFiles, numDims, numDims);
+
+    // Construct the affinity matrix using the newly-created sequence files
+    DistributedRowMatrix A =
+        new DistributedRowMatrix(affSeqFiles, new Path(outputTmp, "afftmp"), numDims, numDims);
+
+    Configuration depConf = new Configuration(conf);
+    A.setConf(depConf);
+
+    // Construct the diagonal matrix D (represented as a vector)
+    Vector D = MatrixDiagonalizeJob.runJob(affSeqFiles, numDims);
+
+    //Calculate the normalized Laplacian of the form: L = D^(-0.5)AD^(-0.5)
+    DistributedRowMatrix L = VectorMatrixMultiplicationJob.runJob(affSeqFiles, D,
+        new Path(outputCalc, "laplacian"), new Path(outputCalc, outputCalc));
+    L.setConf(depConf);
+
+    Path data;
+
+    if (ssvd) {
+      // SSVD requires an array of Paths to function. So we pass in an array of length one
+      Path [] LPath = new Path[1];
+      LPath[0] = L.getRowPath();
+
+      Path SSVDout = new Path(outputCalc, "SSVD");
+
+      SSVDSolver solveIt = new SSVDSolver(
+          depConf,
+          LPath,
+          SSVDout,
+          blockHeight,
+          clusters,
+          oversampling,
+          numReducers);
+
+      solveIt.setComputeV(false);
+      solveIt.setComputeU(true);
+      solveIt.setOverwrite(true);
+      solveIt.setQ(poweriters);
+      //solveIt.setBroadcast(false);
+      solveIt.run();
+      data = new Path(solveIt.getUPath());
+    } else {
+      // Perform eigen-decomposition using LanczosSolver
+      // since some of the eigen-output is spurious and will be eliminated
+      // upon verification, we have to aim to overshoot and then discard
+      // unnecessary vectors later
+      int overshoot = Math.min((int) ((double) clusters * OVERSHOOTMULTIPLIER), numDims);
+      DistributedLanczosSolver solver = new DistributedLanczosSolver();
+      LanczosState state = new LanczosState(L, overshoot, solver.getInitialVector(L));
+      Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors");
+
+      solver.runJob(conf,
+                    state,
+                    overshoot,
+                    true,
+                    lanczosSeqFiles.toString());
+
+      // perform a verification
+      EigenVerificationJob verifier = new EigenVerificationJob();
+      Path verifiedEigensPath = new Path(outputCalc, "eigenverifier");
+      verifier.runJob(conf,
+              lanczosSeqFiles,
+              L.getRowPath(),
+              verifiedEigensPath,
+              true,
+              1.0,
+              clusters);
+
+      Path cleanedEigens = verifier.getCleanedEigensPath();
+      DistributedRowMatrix W = new DistributedRowMatrix(
+          cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
+      W.setConf(depConf);
+      DistributedRowMatrix Wtrans = W.transpose();
+      data = Wtrans.getRowPath();
+    }
+
+    // Normalize the rows of Wt to unit length
+    // normalize is important because it reduces the occurrence of two unique clusters  combining into one
+    Path unitVectors = new Path(outputCalc, "unitvectors");
+
+    UnitVectorizerJob.runJob(data, unitVectors);
+
+    DistributedRowMatrix Wt = new DistributedRowMatrix(
+        unitVectors, new Path(unitVectors, "tmp"), clusters, numDims);
+    Wt.setConf(depConf);
+    data = Wt.getRowPath();
+
+    // Generate random initial clusters
+    Path initialclusters = RandomSeedGenerator.buildRandom(conf, data,
+        new Path(output, Cluster.INITIAL_CLUSTERS_DIR), clusters, measure);
+
+        // Run the KMeansDriver
+    Path answer = new Path(output, "kmeans_out");
+    KMeansDriver.run(conf, data, initialclusters, answer,
+        measure,convergenceDelta, maxIterations, true, 0.0, false);
     }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/PathDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/PathDirectory.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/PathDirectory.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/PathDirectory.java Sun Mar 24 20:05:50 2013
@@ -91,4 +91,4 @@ public final class PathDirectory {
     return new Path(clusterPostProcessorOutput + File.separator + clusterId);
   }
   
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java Sun Mar 24 20:05:50 2013
@@ -63,4 +63,4 @@ public final class ClusterCountReader {
     return numberOfClusters;
   }
 
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java Sun Mar 24 20:05:50 2013
@@ -104,7 +104,7 @@ public final class ClusterOutputPostProc
   private void putVectorInRespectiveCluster(String clusterId, WeightedVectorWritable point) throws IOException {
     Writer writer = findWriterForVector(clusterId);
     postProcessedClusterDirectories.put(clusterId,
-                                        PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId));
+        PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId));
     writeVectorToCluster(writer, point);
   }
   
@@ -141,4 +141,4 @@ public final class ClusterOutputPostProc
     this.clusteredPoints = clusteredPoints;
   }
   
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java Sun Mar 24 20:05:50 2013
@@ -42,7 +42,7 @@ import org.apache.mahout.math.VectorWrit
  * used for top down clustering. It can also be used if the clustering output needs to be grouped into their
  * respective clusters.
  */
-public class ClusterOutputPostProcessorDriver extends AbstractJob {
+public final class ClusterOutputPostProcessorDriver extends AbstractJob {
   
   /**
    * CLI to run clustering post processor. The input to post processor is the ouput path specified to the

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Sun Mar 24 20:05:50 2013
@@ -316,8 +316,10 @@ public abstract class AbstractJob extend
   /**
    *
    * @param args  The args to parse
-   * @param inputOptional if false, then the input option, if set, need not be present.  If true and input is an option and there is no input, then throw an error
-   * @param outputOptional if false, then the output option, if set, need not be present.  If true and output is an option and there is no output, then throw an error
+   * @param inputOptional if false, then the input option, if set, need not be present.  If true and input is an option
+   *                      and there is no input, then throw an error
+   * @param outputOptional if false, then the output option, if set, need not be present.  If true and output is an
+   *                       option and there is no output, then throw an error
    * @return the args parsed into a map.
    */
   public Map<String, List<String>> parseArguments(String[] args, boolean inputOptional, boolean outputOptional)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Sun Mar 24 20:05:50 2013
@@ -98,8 +98,10 @@ public final class HadoopUtil {
    * @param outputPath The output {@link org.apache.hadoop.fs.Path}
    * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
    * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
-   * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op, this value may be null
-   * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op, this value may be null
+   * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op,
+   *                  this value may be null
+   * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op,
+   *                    this value may be null
    * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
    * @param reducerKey The reducer key class.
    * @param reducerValue The reducer value class.
@@ -109,7 +111,8 @@ public final class HadoopUtil {
    * @throws IOException if there is a problem with the IO.
    *
    * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
-   * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, org.apache.hadoop.conf.Configuration)
+   * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
+   * org.apache.hadoop.conf.Configuration)
    */
   public static Job prepareJob(Path inputPath,
                            Path outputPath,
@@ -203,7 +206,9 @@ public final class HadoopUtil {
   }
 
   /**
-   * Count all the records in a directory using a {@link org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator}
+   * Count all the records in a directory using a
+   * {@link org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator}
+   *
    * @param path The {@link org.apache.hadoop.fs.Path} to count
    * @param pt The {@link org.apache.mahout.common.iterator.sequencefile.PathType}
    * @param filter Apply the {@link org.apache.hadoop.fs.PathFilter}.  May be null
@@ -226,7 +231,8 @@ public final class HadoopUtil {
     return fs.open(path.makeQualified(fs));
   }
 
-  public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter, Comparator<FileStatus> ordering, Configuration conf) throws IOException {
+  public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter,
+      Comparator<FileStatus> ordering, Configuration conf) throws IOException {
     FileStatus[] statuses;
     FileSystem fs = path.getFileSystem(conf);
     if (filter == null) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java Sun Mar 24 20:05:50 2013
@@ -77,4 +77,4 @@ public final class LongPair implements C
     }
   }
   
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java Sun Mar 24 20:05:50 2013
@@ -76,4 +76,4 @@ public final class FileLineIterable impl
     }
   }
   
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterable.java Sun Mar 24 20:05:50 2013
@@ -42,4 +42,4 @@ public final class SamplingIterable<T> i
     return samplingRate >= 1.0 ? delegate : new SamplingIterable<T>(delegate, samplingRate);
   }
   
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StringRecordIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StringRecordIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StringRecordIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StringRecordIterator.java Sun Mar 24 20:05:50 2013
@@ -52,4 +52,4 @@ public class StringRecordIterator extend
     return delegate;
   }
 
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java Sun Mar 24 20:05:50 2013
@@ -105,4 +105,4 @@ public final class SequenceFileIterator<
     }
   }
 
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java Sun Mar 24 20:05:50 2013
@@ -85,4 +85,4 @@ public final class SequenceFileValueIter
     }
   }
 
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java Sun Mar 24 20:05:50 2013
@@ -27,8 +27,8 @@ public final class AnalyzerUtils {
   }
 
   /**
-   * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}.  Note, if you need to pass in parameters
-   * to your constructor, you will need to wrap it in an implementation that does not take any arguments
+   * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}.  Note, if you need to pass in
+   * parameters to your constructor, you will need to wrap it in an implementation that does not take any arguments
    * @param analyzerClassName - Lucene Analyzer Name
    * @return {@link Analyzer}
    * @throws ClassNotFoundException - {@link ClassNotFoundException}
@@ -45,16 +45,16 @@ public final class AnalyzerUtils {
   }
 
   /**
-   * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}.  Note, if you need to pass in parameters
-   * to your constructor, you will need to wrap it in an implementation that does not take any arguments
+   * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}.  Note, if you need to pass in
+   * parameters to your constructor, you will need to wrap it in an implementation that does not take any arguments
    * @param analyzerClass The Analyzer Class to instantiate
    * @return {@link Analyzer}
    */
-  public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass){
+  public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass) {
     return createAnalyzer(analyzerClass, Version.LUCENE_41);
   }
 
-  public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version){
+  public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version) {
     Analyzer analyzer;
     if (analyzerClass == StandardAnalyzer.class) {
       Class<?>[] params = new Class<?>[1];

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java Sun Mar 24 20:05:50 2013
@@ -26,8 +26,9 @@ import java.io.IOException;
 /**
  * Provide an Iterator for the tokens in a TokenStream.
  *
- * Note, it is the responsibility of the instantiating class to properly consume the {@link org.apache.lucene.analysis.TokenStream}.  See
- * the Lucene {@link org.apache.lucene.analysis.TokenStream} documentation for more information.
+ * Note, it is the responsibility of the instantiating class to properly consume the
+ * {@link org.apache.lucene.analysis.TokenStream}.  See the Lucene {@link org.apache.lucene.analysis.TokenStream}
+ * documentation for more information.
  */
 //TODO: consider using the char/byte arrays instead of strings, esp. when we upgrade to Lucene 4.0
 public final class TokenStreamIterator extends AbstractIterator<String> {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsCombiner.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsCombiner.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsCombiner.java Sun Mar 24 20:05:50 2013
@@ -31,4 +31,4 @@ public class MergeVectorsCombiner
       throws IOException, InterruptedException {
     ctx.write(key, VectorWritable.merge(vectors.iterator()));
   }
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/TransposeMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/TransposeMapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/TransposeMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/TransposeMapper.java Sun Mar 24 20:05:50 2013
@@ -40,4 +40,4 @@ public class TransposeMapper extends Map
       ctx.write(r, new VectorWritable(tmp));
     }
   }
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java Sun Mar 24 20:05:50 2013
@@ -117,4 +117,4 @@ public abstract class AbstractParameter<
     }
   }
   
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parameter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parameter.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parameter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parameter.java Sun Mar 24 20:05:50 2013
@@ -59,4 +59,4 @@ public interface Parameter<T> extends Pa
   
   /** @return value used if not set by consumer */
   String defaultValue();
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Sun Mar 24 20:05:50 2013
@@ -195,7 +195,8 @@ public final class MahoutDriver {
     programDriver.driver(argsList.toArray(new String[argsList.size()]));
 
     if (log.isInfoEnabled()) {
-      log.info("Program took {} ms (Minutes: {})", System.currentTimeMillis() - start, (System.currentTimeMillis() - start)/60000.0);
+      log.info("Program took {} ms (Minutes: {})", System.currentTimeMillis() - start,
+          (System.currentTimeMillis() - start) / 60000.0);
     }
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java Sun Mar 24 20:05:50 2013
@@ -23,4 +23,4 @@
  * mapping is useful for values that must stay within a range but whose distribution is roughly exponential near
  * geometric mean of the end-points.  An identity mapping is also supplied.</p>
  */
-package org.apache.mahout.ep;
\ No newline at end of file
+package org.apache.mahout.ep;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java Sun Mar 24 20:05:50 2013
@@ -55,7 +55,7 @@ public final class ParallelFPGrowthReduc
   private int maxPerGroup;
   private boolean useFP2;
 
-  private static class IteratorAdapter implements Iterator<Pair<List<Integer>,Long>> {
+  private static final class IteratorAdapter implements Iterator<Pair<List<Integer>,Long>> {
     private final Iterator<Pair<IntArrayList,Long>> innerIter;
 
     private IteratorAdapter(Iterator<Pair<IntArrayList,Long>> transactionIter) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java Sun Mar 24 20:05:50 2013
@@ -34,7 +34,7 @@ public class TransactionIterator<T> exte
 
   private final int[] transactionBuffer;
   private final Iterator<Pair<int[],Long>> delegate;
-  
+
   public TransactionIterator(Iterator<Pair<List<T>,Long>> transactions, final Map<T,Integer> attributeIdMapping) {
     transactionBuffer = new int[attributeIdMapping.size()];
     delegate = Iterators.transform(
@@ -43,8 +43,8 @@ public class TransactionIterator<T> exte
           @Override
           public Pair<int[],Long> apply(Pair<List<T>,Long> from) {
             if (from == null) {
-	      return null;
-	    }
+        return null;
+      }
             int index = 0;
             for (T attribute : from.getFirst()) {
               if (attributeIdMapping.containsKey(attribute)) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java Sun Mar 24 20:05:50 2013
@@ -373,4 +373,4 @@ public final class FPTree {
     return sb.toString();
   }
 
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java Sun Mar 24 20:05:50 2013
@@ -46,4 +46,4 @@
  * increases the memory consumption but might improve speed until a certain point. This depends entirely on
  * the dataset in question. A value of 5-10 is recommended for mining up to top 100 patterns for each feature.</p>
  */
-package org.apache.mahout.fpm.pfpgrowth;
\ No newline at end of file
+package org.apache.mahout.fpm.pfpgrowth;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java Sun Mar 24 20:05:50 2013
@@ -83,4 +83,4 @@ public class VarIntWritable implements W
     value = Varint.readSignedVarInt(in);
   }
 
-}
\ No newline at end of file
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java Sun Mar 24 20:05:50 2013
@@ -80,4 +80,4 @@ public class VarLongWritable implements 
     value = Varint.readSignedVarLong(in);
   }
 
-}
\ No newline at end of file
+}