You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2013/03/13 00:10:16 UTC

svn commit: r1455748 [1/2] - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/...

Author: srowen
Date: Tue Mar 12 23:10:14 2013
New Revision: 1455748

URL: http://svn.apache.org/r1455748
Log:
More changes from code inspection / findbugs / pmd

Added:
    mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java
Removed:
    mahout/trunk/math/src/main/java/org/apache/mahout/math/function/package.html
    mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package.html
    mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package.html
    mahout/trunk/math/src/main/java/org/apache/mahout/math/package.html
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/random/SyntheticVariable.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/solver/ConjugateGradientSolver.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/FileBasedSparseBinaryMatrixTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/QRDecompositionTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Tue Mar 12 23:10:14 2013
@@ -202,10 +202,9 @@ public class ParallelALSFactorizationJob
 
   static class ItemRatingVectorsMapper extends Mapper<LongWritable,Text,IntWritable,VectorWritable> {
 
-    private IntWritable itemIDWritable = new IntWritable();
-    private VectorWritable ratingsWritable = new VectorWritable(true);
-
-    private Vector ratings = new SequentialAccessSparseVector(Integer.MAX_VALUE, 1);
+    private final IntWritable itemIDWritable = new IntWritable();
+    private final VectorWritable ratingsWritable = new VectorWritable(true);
+    private final Vector ratings = new SequentialAccessSparseVector(Integer.MAX_VALUE, 1);
 
     @Override
     protected void map(LongWritable offset, Text line, Context ctx) throws IOException, InterruptedException {
@@ -251,7 +250,7 @@ public class ParallelALSFactorizationJob
     private int numFeatures;
     private OpenIntObjectHashMap<Vector> UorM;
 
-    private VectorWritable uiOrmjWritable = new VectorWritable();
+    private final VectorWritable uiOrmjWritable = new VectorWritable();
 
     @Override
     protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
@@ -287,7 +286,7 @@ public class ParallelALSFactorizationJob
 
     private ImplicitFeedbackAlternatingLeastSquaresSolver solver;
 
-    private VectorWritable uiOrmjWritable = new VectorWritable();
+    private final VectorWritable uiOrmjWritable = new VectorWritable();
 
     @Override
     protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
@@ -316,9 +315,9 @@ public class ParallelALSFactorizationJob
 
   static class AverageRatingMapper extends Mapper<IntWritable,VectorWritable,IntWritable,VectorWritable> {
 
-    private IntWritable firstIndex = new IntWritable(0);
-    private Vector featureVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
-    private VectorWritable featureVectorWritable = new VectorWritable();
+    private final IntWritable firstIndex = new IntWritable(0);
+    private final Vector featureVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
+    private final VectorWritable featureVectorWritable = new VectorWritable();
 
     @Override
     protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java Tue Mar 12 23:10:14 2013
@@ -107,7 +107,7 @@ public class RecommenderJob extends Abst
     private int recommendationsPerUser;
     private float maxRating;
 
-    private RecommendedItemsWritable recommendations = new RecommendedItemsWritable();
+    private final RecommendedItemsWritable recommendations = new RecommendedItemsWritable();
 
     @Override
     protected void setup(Context ctx) throws IOException, InterruptedException {
@@ -123,7 +123,7 @@ public class RecommenderJob extends Abst
     }
 
     // we can use a simple dot product computation, as both vectors are dense
-    private double dot(Vector x, Vector y) {
+    private static double dot(Vector x, Vector y) {
       int numFeatures = x.size();
       double sum = 0;
       for (int n = 0; n < numFeatures; n++) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java Tue Mar 12 23:10:14 2013
@@ -40,7 +40,7 @@ public class TopItemQueue extends Priori
     while (size() > 0) {
       MutableRecommendedItem topItem = pop();
       // filter out "sentinel" objects necessary for maintaining an efficient priority queue
-      if (topItem.getItemID() != TopItemQueue.SENTINEL_ID) {
+      if (topItem.getItemID() != SENTINEL_ID) {
         recommendedItems.add(topItem);
       }
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java Tue Mar 12 23:10:14 2013
@@ -242,7 +242,7 @@ public final class ImplicitLinearRegress
     /* calculating cosine similarity to determine when to stop the algorithm, this could be used to detect convergence */
     double cosine = aTb / (Math.sqrt(normA) * Math.sqrt(normB));
     if (Double.isNaN(cosine)) {
-      log.info("Cosine similarity is NaN, recomputeUserFeatures=" + recomputeUserFeatures + " id=" + id);
+      log.info("Cosine similarity is NaN, recomputeUserFeatures={} id={}", recomputeUserFeatures, id);
     } else {
       avrChange.addDatum(cosine);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java Tue Mar 12 23:10:14 2013
@@ -28,17 +28,17 @@ import com.google.common.base.Preconditi
  * Defines the interface for classifiers that take a vector as input. This is
  * implemented as an abstract class so that it can implement a number of handy
  * convenience methods related to classification of vectors.
- * 
+ *
  * <p>
  * A classifier takes an input vector and calculates the scores (usually
- * probabilities) that the input vector belongs to one of <code>n</code>
- * categories. In <code>AbstractVectorClassifier</code> each category is denoted
- * by an integer <code>c</code> between <code>0</code> and <code>n-1</code>
+ * probabilities) that the input vector belongs to one of {@code n}
+ * categories. In {@code AbstractVectorClassifier} each category is denoted
+ * by an integer {@code c} between {@code 0} and {@code n-1}
  * (inclusive).
- * 
+ *
  * <p>
  * New users should start by looking at {@link #classifyFull} (not {@link #classify}).
- * 
+ *
  */
 public abstract class AbstractVectorClassifier {
 
@@ -46,26 +46,26 @@ public abstract class AbstractVectorClas
   public static final double MIN_LOG_LIKELIHOOD = -100.0;
 
    /**
-   * Returns the number of categories that a target variable can be assigned to.
-   * A vector classifier will encode it's output as an integer from
-   * <code>0</code> to <code>numCategories()-1</code> (inclusive).
-   * 
-   * @return The number of categories.
-   */
+    * Returns the number of categories that a target variable can be assigned to.
+    * A vector classifier will encode it's output as an integer from
+    * {@code 0} to {@code numCategories()-1} (inclusive).
+    *
+    * @return The number of categories.
+    */
   public abstract int numCategories();
 
   /**
-   * Compute and return a vector containing <code>n-1</code> scores, where
-   * <code>n</code> is equal to <code>numCategories()</code>, given an input
-   * vector <code>instance</code>. Higher scores indicate that the input vector
+   * Compute and return a vector containing {@code n-1} scores, where
+   * {@code n} is equal to {@code numCategories()}, given an input
+   * vector {@code instance}. Higher scores indicate that the input vector
    * is more likely to belong to that category. The categories are denoted by
-   * the integers <code>0</code> through <code>n-1</code> (inclusive), and the
+   * the integers {@code 0} through {@code n-1} (inclusive), and the
    * scores in the returned vector correspond to categories 1 through
-   * <code>n-1</code> (leaving out category 0). It is assumed that the score for
+   * {@code n-1} (leaving out category 0). It is assumed that the score for
    * category 0 is one minus the sum of the scores in the returned vector.
-   * 
+   *
    * @param instance  A feature vector to be classified.
-   * @return A vector of probabilities in 1 of <code>n-1</code> encoding.
+   * @return A vector of probabilities in 1 of {@code n-1} encoding.
    */
   public abstract Vector classify(Vector instance);
   
@@ -75,7 +75,7 @@ public abstract class AbstractVectorClas
    * is just the linear part of the classification.
    * 
    * <p>
-   * The implementation of this method provided by {@link AbstractVectorClassifier} throws an
+   * The implementation of this method provided by {@code AbstractVectorClassifier} throws an
    * {@link UnsupportedOperationException}. Your subclass must explicitly override this method to support
    * this operation.
    * 
@@ -100,33 +100,33 @@ public abstract class AbstractVectorClas
   public abstract double classifyScalar(Vector instance);
 
   /**
-   * Computes and returns a vector containing <code>n</code> scores, where
-   * <code>n</code> is <code>numCategories()</code>, given an input vector
-   * <code>instance</code>. Higher scores indicate that the input vector is more
+   * Computes and returns a vector containing {@code n} scores, where
+   * {@code n} is {@code numCategories()}, given an input vector
+   * {@code instance}. Higher scores indicate that the input vector is more
    * likely to belong to the corresponding category. The categories are denoted
-   * by the integers <code>0</code> through <code>n-1</code> (inclusive).
-   * 
+   * by the integers {@code 0} through {@code n-1} (inclusive).
+   *
    * <p>
    * Using this method it is possible to classify an input vector, for example,
    * by selecting the category with the largest score. If
-   * <code>classifier</code> is an instance of
-   * <code>AbstractVectorClassifier</code> and <code>input</code> is a
-   * <code>Vector</code> of features describing an element to be classified,
-   * then the following code could be used to classify <code>input</code>.<br>
-   * <code>
+   * {@code classifier} is an instance of
+   * {@code AbstractVectorClassifier} and {@code input} is a
+   * {@code Vector} of features describing an element to be classified,
+   * then the following code could be used to classify {@code input}.<br>
+   * {@code
    * Vector scores = classifier.classifyFull(input);<br>
    * int assignedCategory = scores.maxValueIndex();<br>
-   * </code> Here <code>assignedCategory</code> is the index of the category
+   * } Here {@code assignedCategory} is the index of the category
    * with the maximum score.
-   * 
+   *
    * <p>
-   * If an <code>n-1</code> encoding is acceptable, and allocation performance
+   * If an {@code n-1} encoding is acceptable, and allocation performance
    * is an issue, then the {@link #classify(Vector)} method is probably better
    * to use.
-   * 
+   *
    * @see #classify(Vector)
    * @see #classifyFull(Vector r, Vector instance)
-   * 
+   *
    * @param instance A vector of features to be classified.
    * @return A vector of probabilities, one for each category.
    */
@@ -135,29 +135,29 @@ public abstract class AbstractVectorClas
   }
 
   /**
-   * Computes and returns a vector containing <code>n</code> scores, where
-   * <code>n</code> is <code>numCategories()</code>, given an input vector
-   * <code>instance</code>. Higher scores indicate that the input vector is more
+   * Computes and returns a vector containing {@code n} scores, where
+   * {@code n} is {@code numCategories()}, given an input vector
+   * {@code instance}. Higher scores indicate that the input vector is more
    * likely to belong to the corresponding category. The categories are denoted
-   * by the integers <code>0</code> through <code>n-1</code> (inclusive). The
+   * by the integers {@code 0} through {@code n-1} (inclusive). The
    * main difference between this method and {@link #classifyFull(Vector)} is
    * that this method allows a user to provide a previously allocated
-   * <code>Vector r</code> to store the returned scores.
-   * 
+   * {@code Vector r} to store the returned scores.
+   *
    * <p>
    * Using this method it is possible to classify an input vector, for example,
    * by selecting the category with the largest score. If
-   * <code>classifier</code> is an instance of
-   * <code>AbstractVectorClassifier</code>, <code>result</code> is a non-null
-   * <code>Vector</code>, and <code>input</code> is a <code>Vector</code> of
+   * {@code classifier} is an instance of
+   * {@code AbstractVectorClassifier}, {@code result} is a non-null
+   * {@code Vector}, and {@code input} is a {@code Vector} of
    * features describing an element to be classified, then the following code
-   * could be used to classify <code>input</code>.<br>
-   * <code>
+   * could be used to classify {@code input}.<br>
+   * {@code
    * Vector scores = classifier.classifyFull(result, input); // Notice that scores == result<br>
    * int assignedCategory = scores.maxValueIndex();<br>
-   * </code> Here <code>assignedCategory</code> is the index of the category
+   * } Here {@code assignedCategory} is the index of the category
    * with the maximum score.
-   * 
+   *
    * @param r Where to put the results.
    * @param instance  A vector of features to be classified.
    * @return A vector of scores/probabilities, one for each category.
@@ -171,10 +171,10 @@ public abstract class AbstractVectorClas
 
   /**
    * Returns n-1 probabilities, one for each categories 1 through
-   * <code>n-1</code>, for each row of a matrix, where <code>n</code> is equal
-   * to <code>numCategories()</code>. The probability of the missing 0-th
+   * {@code n-1}, for each row of a matrix, where {@code n} is equal
+   * to {@code numCategories()}. The probability of the missing 0-th
    * category is 1 - rowSum(this result).
-   * 
+   *
    * @param data  The matrix whose rows are the input vectors to classify
    * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
    */
@@ -187,8 +187,8 @@ public abstract class AbstractVectorClas
   }
 
   /**
-   * Returns a matrix where the rows of the matrix each contain <code>n</code> probabilities, one for each category.
-   * 
+   * Returns a matrix where the rows of the matrix each contain {@code n} probabilities, one for each category.
+   *
    * @param data  The matrix whose rows are the input vectors to classify
    * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
    */

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java Tue Mar 12 23:10:14 2013
@@ -145,8 +145,7 @@ public class DecisionTreeBuilder impleme
 
       // variance is compared with minimum variance
       if ((var / data.size()) < minVariance) {
-        log.debug("variance(" + (var / data.size()) + ") < minVariance(" + minVariance + ") Leaf(" +
-            (sum / data.size()) + ')');
+        log.debug("variance({}) < minVariance({}) Leaf({})", var / data.size(), minVariance, sum / data.size());
         return new Leaf(sum / data.size());
       }
     } else {
@@ -210,8 +209,7 @@ public class DecisionTreeBuilder impleme
       return new Leaf(label);
     }
 
-    log.debug("best split attr:" + best.getAttr() + ", split:" + best.getSplit() + ", ig:" 
-        + best.getIg());
+    log.debug("best split attr:{}, split:{}, ig:{}", best.getIg(), best.getAttr(), best.getSplit(), best.getIg());
 
     boolean alreadySelected = selected[best.getAttr()];
     if (alreadySelected) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Tue Mar 12 23:10:14 2013
@@ -25,8 +25,8 @@ import org.apache.mahout.math.Vector.Ele
 
 /**
  * Class implementing the Naive Bayes Classifier Algorithm. Note that this class
- * supports {@link #classifyFull}, but not <code>classify</code> or
- * <code>classifyScalar</code>. The reason that these two methods are not
+ * supports {@link #classifyFull}, but not {@code classify} or
+ * {@code classifyScalar}. The reason that these two methods are not
  * supported is because the scores computed by a NaiveBayesClassifier do not
  * represent probabilities.
  */

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java Tue Mar 12 23:10:14 2013
@@ -140,7 +140,7 @@ public class CrossFoldLearner extends Ab
     }
   }
 
-  private long mod(long x, int y) {
+  private static long mod(long x, int y) {
     long r = x % y;
     return r < 0 ? r + y : r;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java Tue Mar 12 23:10:14 2013
@@ -343,7 +343,7 @@ public class CVB0Driver extends Abstract
   private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
     throws IOException, ClassNotFoundException, InterruptedException {
     String jobName = "Calculating perplexity for " + modelPath;
-    log.info("About to run: " + jobName);
+    log.info("About to run: {}", jobName);
     Job job = new Job(conf, jobName);
     job.setJarByClass(CachingCVB0PerplexityMapper.class);
     job.setMapperClass(CachingCVB0PerplexityMapper.class);
@@ -423,7 +423,7 @@ public class CVB0Driver extends Abstract
   private static Job writeTopicModel(Configuration conf, Path modelInput, Path output)
     throws IOException, InterruptedException, ClassNotFoundException {
     String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
-    log.info("About to run: " + jobName);
+    log.info("About to run: {}", jobName);
     Job job = new Job(conf, jobName);
     job.setJarByClass(CVB0Driver.class);
     job.setInputFormatClass(SequenceFileInputFormat.class);
@@ -441,7 +441,7 @@ public class CVB0Driver extends Abstract
   private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
     throws IOException, ClassNotFoundException, InterruptedException {
     String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
-    log.info("About to run: " + jobName);
+    log.info("About to run: {}", jobName);
     Job job = new Job(conf, jobName);
     job.setMapperClass(CVB0DocInferenceMapper.class);
     job.setNumReduceTasks(0);
@@ -483,7 +483,7 @@ public class CVB0Driver extends Abstract
     int iterationNumber = 1;
     Path iterationPath = modelPath(modelTempDir, iterationNumber);
     while (fs.exists(iterationPath) && iterationNumber <= maxIterations) {
-      log.info("Found previous state: " + iterationPath);
+      log.info("Found previous state: {}", iterationPath);
       iterationNumber++;
       iterationPath = modelPath(modelTempDir, iterationNumber);
     }
@@ -495,7 +495,7 @@ public class CVB0Driver extends Abstract
     throws IOException, ClassNotFoundException, InterruptedException {
     String jobName = String.format("Iteration %d of %d, input path: %s",
         iterationNumber, maxIterations, modelInput);
-    log.info("About to run: " + jobName);
+    log.info("About to run: {}", jobName);
     Job job = new Job(conf, jobName);
     job.setJarByClass(CVB0Driver.class);
     job.setMapperClass(CachingCVB0Mapper.class);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Tue Mar 12 23:10:14 2013
@@ -225,12 +225,12 @@ public class InMemoryCollapsedVariationa
     while (iter < minIter) {
       trainDocuments(testFraction);
       if (verbose) {
-        log.info("model after: " + iter + ": " + modelTrainer.getReadModel().toString());
+        log.info("model after: {}: {}", iter, modelTrainer.getReadModel());
       }
-      log.info("iteration " + iter + " complete");
+      log.info("iteration {} complete", iter);
       oldPerplexity = modelTrainer.calculatePerplexity(corpusWeights, docTopicCounts,
           testFraction);
-      log.info(oldPerplexity + " = perplexity");
+      log.info("{} = perplexity", oldPerplexity);
       iter++;
     }
     double newPerplexity = 0;
@@ -238,14 +238,14 @@ public class InMemoryCollapsedVariationa
     while (iter < maxIterations && fractionalChange > minFractionalErrorChange) {
       trainDocuments();
       if (verbose) {
-        log.info("model after: " + iter + ": " + modelTrainer.getReadModel().toString());
+        log.info("model after: {}: {}", iter, modelTrainer.getReadModel());
       }
       newPerplexity = modelTrainer.calculatePerplexity(corpusWeights, docTopicCounts,
           testFraction);
-      log.info(newPerplexity + " = perplexity");
+      log.info("{} = perplexity", newPerplexity);
       iter++;
       fractionalChange = Math.abs(newPerplexity - oldPerplexity) / oldPerplexity;
-      log.info(fractionalChange + " = fractionalChange");
+      log.info("{} = fractionalChange", fractionalChange);
       oldPerplexity = newPerplexity;
     }
     if (iter < maxIterations) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java Tue Mar 12 23:10:14 2013
@@ -96,7 +96,7 @@ public class ModelTrainer {
   }
 
   public void start() {
-    log.info("Starting training threadpool with " + numTrainThreads + " threads");
+    log.info("Starting training threadpool with {} threads", numTrainThreads);
     workQueue = new ArrayBlockingQueue<Runnable>(numTrainThreads * 10);
     threadPool = new ThreadPoolExecutor(numTrainThreads, numTrainThreads, 0, TimeUnit.SECONDS,
         workQueue);
@@ -169,10 +169,10 @@ public class ModelTrainer {
               (System.nanoTime() - start) /(1.0e6 * document.getNumNondefaultElements());
           if (i % 100 == 0) {
             long time = System.nanoTime() - startTime;
-            log.debug("trained " + i + " documents in " + (time / 1.0e6) + "ms");
+            log.debug("trained {} documents in {}ms", i, time / 1.0e6);
             if (i % 500 == 0) {
               Arrays.sort(times);
-              log.debug("training took median " + times[times.length / 2] + "ms per token-instance");
+              log.debug("training took median {}ms per token-instance", times[times.length / 2]);
             }
           }
         }
@@ -211,7 +211,7 @@ public class ModelTrainer {
             numTopics, numTerms, true), numDocTopicIters));
         return;
       } catch (InterruptedException e) {
-        log.warn("Interrupted waiting to submit document to work queue: " + document, e);
+        log.warn("Interrupted waiting to submit document to work queue: {}", document, e);
       }
     }
   }
@@ -239,11 +239,11 @@ public class ModelTrainer {
         log.warn("Threadpool timed out on await termination - jobs still running!");
       }
       long newTime = System.nanoTime();
-      log.info("threadpool took: " + (newTime - startTime) / 1.0e6 + "ms");
+      log.info("threadpool took: {}ms", (newTime - startTime) / 1.0e6);
       startTime = newTime;
       writeModel.awaitTermination();
       newTime = System.nanoTime();
-      log.info("writeModel.awaitTermination() took " + (newTime - startTime) / 1.0e6 + "ms");
+      log.info("writeModel.awaitTermination() took {}ms", (newTime - startTime) / 1.0e6);
       TopicModel tmpModel = writeModel;
       writeModel = readModel;
       readModel = tmpModel;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java Tue Mar 12 23:10:14 2013
@@ -21,11 +21,10 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.util.Version;
 import org.apache.mahout.common.ClassUtils;
 
-/**
- *
- *
- **/
-public class AnalyzerUtils {
+public final class AnalyzerUtils {
+
+  private AnalyzerUtils() {
+  }
 
   /**
    * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}.  Note, if you need to pass in parameters

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java Tue Mar 12 23:10:14 2013
@@ -28,7 +28,7 @@ import java.io.IOException;
 public class MergeVectorsReducer extends
     Reducer<WritableComparable<?>,VectorWritable,WritableComparable<?>,VectorWritable> {
 
-  private VectorWritable result = new VectorWritable();
+  private final VectorWritable result = new VectorWritable();
 
   @Override
   public void reduce(WritableComparable<?> key, Iterable<VectorWritable> vectors, Context ctx)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java Tue Mar 12 23:10:14 2013
@@ -335,7 +335,7 @@ public final class FPTree {
     int attribute = node.attribute();
     if (items == null) {
       // at root
-      if (!(node == root)) {
+      if (node != root) {
         throw new IllegalStateException();
       }
       items = new IntArrayList();
@@ -352,7 +352,7 @@ public final class FPTree {
       qTree.accumulate(items, toAdd);
       added += toAdd;
     }
-    if (!(node == root)) {
+    if (node != root) {
       int lastIdx = items.size() - 1;
       if (items.get(lastIdx) != attribute) {
         throw new IllegalStateException();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java Tue Mar 12 23:10:14 2013
@@ -96,7 +96,7 @@ public class VectorDistanceSimilarityJob
       String maxDistanceArg = getOption(MAX_DISTANCE);
       if (maxDistanceArg != null) {
         maxDistance = Double.parseDouble(maxDistanceArg);
-        Preconditions.checkArgument(maxDistance > 0d, "value for " + MAX_DISTANCE + " must be greater than zero");
+        Preconditions.checkArgument(maxDistance > 0.0d, "value for " + MAX_DISTANCE + " must be greater than zero");
       }
     }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java Tue Mar 12 23:10:14 2013
@@ -62,7 +62,7 @@ import org.apache.mahout.math.hadoop.sto
  * reducer.
  */
 @SuppressWarnings("deprecation")
-public class ABtDenseOutJob {
+public final class ABtDenseOutJob {
 
   public static final String PROP_BT_PATH = "ssvd.Bt.path";
   public static final String PROP_BT_BROADCAST = "ssvd.Bt.broadcast";

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java Tue Mar 12 23:10:14 2013
@@ -21,7 +21,7 @@ import org.apache.lucene.search.similari
 //TODO: add a new class that supports arbitrary Lucene similarity implementations
 public class TFIDF implements Weight {
 
-  private DefaultSimilarity sim = new DefaultSimilarity();
+  private final DefaultSimilarity sim = new DefaultSimilarity();
 
   public TFIDF() {
   }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java Tue Mar 12 23:10:14 2013
@@ -19,6 +19,7 @@ package org.apache.mahout.cf.taste.hadoo
 
 import org.apache.mahout.cf.taste.impl.TasteTestCase;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.MahoutTestCase;
 import org.junit.Test;
 
 import java.util.List;
@@ -28,33 +29,33 @@ public class TopItemQueueTest extends Ta
   @Test
   public void topK() {
 
-    float[] ratings = { .5f, .6f, .7f, 2f, 0f };
+    float[] ratings = {0.5f, 0.6f, 0.7f, 2.0f, 0.0f};
 
     List<RecommendedItem> topItems = findTop(ratings, 2);
 
     assertEquals(2, topItems.size());
-    assertEquals(3l, topItems.get(0).getItemID());
-    assertEquals(2f, topItems.get(0).getValue(), TasteTestCase.EPSILON);
-    assertEquals(2l, topItems.get(1).getItemID());
-    assertEquals(.7f, topItems.get(1).getValue(), TasteTestCase.EPSILON);
+    assertEquals(3L, topItems.get(0).getItemID());
+    assertEquals(2.0f, topItems.get(0).getValue(), MahoutTestCase.EPSILON);
+    assertEquals(2L, topItems.get(1).getItemID());
+    assertEquals(0.7f, topItems.get(1).getValue(), MahoutTestCase.EPSILON);
   }
 
   @Test
   public void topKInputSmallerThanK() {
 
-    float[] ratings = {.7f, 2f};
+    float[] ratings = {0.7f, 2.0f};
 
     List<RecommendedItem> topItems = findTop(ratings, 3);
 
     assertEquals(2, topItems.size());
-    assertEquals(1l, topItems.get(0).getItemID());
-    assertEquals(2f, topItems.get(0).getValue(), TasteTestCase.EPSILON);
-    assertEquals(0l, topItems.get(1).getItemID());
-    assertEquals(.7f, topItems.get(1).getValue(), TasteTestCase.EPSILON);
+    assertEquals(1L, topItems.get(0).getItemID());
+    assertEquals(2.0f, topItems.get(0).getValue(), MahoutTestCase.EPSILON);
+    assertEquals(0L, topItems.get(1).getItemID());
+    assertEquals(0.7f, topItems.get(1).getValue(), MahoutTestCase.EPSILON);
   }
 
 
-  private List<RecommendedItem> findTop(float[] ratings, int k) {
+  private static List<RecommendedItem> findTop(float[] ratings, int k) {
     TopItemQueue queue = new TopItemQueue(k);
 
     for (int item = 0; item < ratings.length; item++) {

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java Tue Mar 12 23:10:14 2013
@@ -94,6 +94,7 @@ public final class NodeTest extends Maho
     assertEquals(node, readNode());
   }
 
+  @Test
   public void testCategoricalNode() throws Exception {
 
     Node node = new CategoricalNode(rng.nextInt(), new double[]{rng.nextDouble(),

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java Tue Mar 12 23:10:14 2013
@@ -210,8 +210,8 @@ public final class OnlineLogisticRegress
     // select training and test data
     List<Integer> train = order.subList(0, 100);
     List<Integer> test = order.subList(100, 150);
-    logger.warn("Training set = " + train);
-    logger.warn("Test set = " + test);
+    logger.warn("Training set = {}", train);
+    logger.warn("Test set = {}", test);
 
     // now train many times and collect information on accuracy each time
     int[] correct = new int[test.size() + 1];

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Tue Mar 12 23:10:14 2013
@@ -68,6 +68,7 @@ public final class DummyStatusReporter e
   public void setStatus(String status) {
   }
 
+  @Override
   public float getProgress() {
     return 0.0f;
   }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java Tue Mar 12 23:10:14 2013
@@ -226,7 +226,7 @@ public class TestVectorDistanceSimilarit
     }
   }
 
-  private List<VectorWritable> getPointsWritable(double[][] raw) {
+  private static List<VectorWritable> getPointsWritable(double[][] raw) {
     List<VectorWritable> points = Lists.newArrayList();
     for (double[] fr : raw) {
       Vector vec = new RandomAccessSparseVector(fr.length);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java Tue Mar 12 23:10:14 2013
@@ -59,13 +59,13 @@ public class LocalSSVDSolverDenseTest ex
     runSSVDSolver(1);
   }
 
-  /*
-   * remove from active tests to save time.
-   */
-  /* @Test */
+  // remove from active tests to save time.
+  /* 
+  @Test
   public void testSSVDSolverPowerIterations2() throws IOException {
     runSSVDSolver(2);
   }
+   */
 
   public void runSSVDSolver(int q) throws IOException {
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Tue Mar 12 23:10:14 2013
@@ -56,13 +56,13 @@ public class LocalSSVDSolverSparseSequen
 
   private static final double s_epsilon = 1.0E-10d;
 
-  /*
-   * removing from tests to reduce test running time
-   */
-  /* @Test */
+  // removing from tests to reduce test running time
+  /* 
+  @Test
   public void testSSVDSolverSparse() throws IOException {
     runSSVDSolver(0);
   }
+   */
 
   @Test
   public void testSSVDSolverPowerIterations1() throws IOException {

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java Tue Mar 12 23:10:14 2013
@@ -35,7 +35,7 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.math.hadoop.stochasticsvd.qr.GramSchmidt;
 
-public class SSVDTestsHelper {
+public final class SSVDTestsHelper {
 
   private SSVDTestsHelper() {
   }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java Tue Mar 12 23:10:14 2013
@@ -20,7 +20,7 @@ package org.apache.mahout.cf.taste.examp
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class EstimateConverter {
+public final class EstimateConverter {
 
   private static final Logger log = LoggerFactory.getLogger(EstimateConverter.class);
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java Tue Mar 12 23:10:14 2013
@@ -44,7 +44,7 @@ import java.io.OutputStream;
  * needs at least 6-7GB of memory, tested with -Xms6700M -Xmx6700M
  *
  */
-public class Track1SVDRunner {
+public final class Track1SVDRunner {
 
   private static final Logger log = LoggerFactory.getLogger(Track1SVDRunner.class);
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java Tue Mar 12 23:10:14 2013
@@ -356,7 +356,7 @@ public class DisplayMinHash extends Disp
       }
       list.add(next.getSecond().get());
     }
-    log.info("Loaded: " + clusters.size() + " clusters");
+    log.info("Loaded: {} clusters", clusters.size());
   }
 
   private static void runMinHash(Configuration conf, Path samples, Path output)

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Tue Mar 12 23:10:14 2013
@@ -47,7 +47,7 @@ public final class MailArchivesClusterin
   // HTML tags, and Java keywords asmany of the messages in the archives
   // are subversion check-in notifications
     
-private static CharArraySet stopSet = new CharArraySet(LUCENE_VERSION, Arrays.asList(
+  private static final CharArraySet stopSet = new CharArraySet(LUCENE_VERSION, Arrays.asList(
     "3d","7bit","a0","about","above","abstract","across","additional","after",
     "afterwards","again","against","align","all","almost","alone","along",
     "already","also","although","always","am","among","amongst","amoungst",

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java Tue Mar 12 23:10:14 2013
@@ -45,7 +45,7 @@ public class WikipediaAnalyzer extends S
     Tokenizer tokenizer = new WikipediaTokenizer(reader);
     TokenStream result = new StandardFilter(Version.LUCENE_41, tokenizer);
     result = new LowerCaseFilter(Version.LUCENE_41, result);
-    result = new StopFilter(Version.LUCENE_41, result, super.getStopwordSet());
+    result = new StopFilter(Version.LUCENE_41, result, getStopwordSet());
     return new TokenStreamComponents(tokenizer, result);
   }
 }
\ No newline at end of file

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Tue Mar 12 23:10:14 2013
@@ -114,7 +114,7 @@ public final class VectorDumper extends 
     boolean sortVectors = hasOption("sortVectors");
     boolean quiet = hasOption("quiet");
     if (!quiet) {
-      log.info("Sort? " + sortVectors);
+      log.info("Sort? {}", sortVectors);
     }
 
     String[] dictionary = null;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Tue Mar 12 23:10:14 2013
@@ -88,8 +88,8 @@ public class ClusterLabels {
   private String idField;
   private final Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints;
   private String output;
-  private int minNumIds;
-  private int maxLabels;
+  private final int minNumIds;
+  private final int maxLabels;
 
   public ClusterLabels(Path seqFileDir,
                        Path pointsDir,

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java Tue Mar 12 23:10:14 2013
@@ -22,7 +22,6 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.utils.vectors.TermInfo;
 import org.apache.mahout.vectorizer.Weight;
 
-import java.io.IOException;
 import java.util.Iterator;
 
 /**
@@ -75,10 +74,6 @@ public final class LuceneIterable implem
 
   @Override
   public Iterator<Vector> iterator() {
-    try {
-      return new LuceneIterator(indexReader, idField, field, terminfo, weight, normPower, maxPercentErrorDocs);
-    } catch (IOException e) {
-      throw new IllegalStateException(e);
-    }
+    return new LuceneIterator(indexReader, idField, field, terminfo, weight, normPower, maxPercentErrorDocs);
   }
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java Tue Mar 12 23:10:14 2013
@@ -58,7 +58,7 @@ public final class LuceneIterator extend
   private final Bump125 bump = new Bump125();
   private long nextLogRecord = bump.increment();
   private int skippedErrorMessages = 0;
-  private Weight weight;
+  private final Weight weight;
 
   /**
    * Produce a LuceneIterable that can create the Vector plus normalize it.
@@ -76,7 +76,7 @@ public final class LuceneIterator extend
                         String field,
                         TermInfo terminfo,
                         Weight weight,
-                        double normPower) throws IOException {
+                        double normPower) {
     this(indexReader, idField, field, terminfo, weight, normPower, 0.0);
   }
 
@@ -97,7 +97,7 @@ public final class LuceneIterator extend
                         TermInfo terminfo,
                         Weight weight,
                         double normPower,
-                        double maxPercentErrorDocs) throws IOException {
+                        double maxPercentErrorDocs) {
     // term docs(null) is a better way of iterating all the docs in Lucene
     Preconditions.checkArgument(normPower == LuceneIterable.NO_NORMALIZING || normPower >= 0,
             "If specified normPower must be nonnegative", normPower);

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Tue Mar 12 23:10:14 2013
@@ -24,8 +24,6 @@ import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -45,18 +43,12 @@ import org.apache.mahout.common.distance
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
 import org.apache.mahout.common.kernel.IKernelProfile;
 import org.apache.mahout.common.kernel.TriangularKernelProfile;
-import org.apache.mahout.math.DenseMatrix;
-import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.hadoop.DistributedRowMatrix;
-import org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver;
-import org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob;
 import org.apache.mahout.utils.clustering.ClusterDumper;
 import org.apache.mahout.utils.vectors.TermEntry;
 import org.apache.mahout.utils.vectors.TermInfo;
@@ -287,7 +279,8 @@ public final class TestClusterDumper ext
     clusterDumper.printClusters(termDictionary);
   }
   
-  // @Test
+  /*
+  @Test
   public void testKmeansSVD() throws Exception {
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     Path output = getTestTempDirPath("output");
@@ -351,7 +344,7 @@ public final class TestClusterDumper ext
     clusterDumper.printClusters(termDictionary);
   }
   
-  // @Test
+  @Test
   public void testKmeansDSVD() throws Exception {
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     Path output = getTestTempDirPath("output");
@@ -393,7 +386,7 @@ public final class TestClusterDumper ext
     clusterDumper.printClusters(termDictionary);
   }
   
-  // @Test
+  @Test
   public void testKmeansDSVD2() throws Exception {
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     Path output = getTestTempDirPath("output");
@@ -437,4 +430,5 @@ public final class TestClusterDumper ext
     		kmeansOutput, 10), new Path(kmeansOutput, "clusteredPoints"));
     clusterDumper.printClusters(termDictionary);
   }
+   */
 }

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java Tue Mar 12 23:10:14 2013
@@ -19,12 +19,11 @@ package org.apache.mahout.utils.vectors.
 import org.apache.mahout.utils.MahoutTestCase;
 import org.junit.Test;
 
-public class ARFFTypeTest extends MahoutTestCase{
+public final class ARFFTypeTest extends MahoutTestCase {
 
   @Test
   public void removeQuotes() {
-    
-    assertEquals(null, ARFFType.removeQuotes(null));
+    assertNull(ARFFType.removeQuotes(null));
     assertEquals("", ARFFType.removeQuotes("\"\""));
     assertEquals("", ARFFType.removeQuotes("''"));
     assertEquals("", ARFFType.removeQuotes(""));

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java Tue Mar 12 23:10:14 2013
@@ -22,7 +22,7 @@ package org.apache.mahout.math;
  *  <p>
  *  See also http://github.com/yonik/java_util for future updates to this file.
  */
-public class MurmurHash3 {
+public final class MurmurHash3 {
 
   private MurmurHash3() {
   }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java Tue Mar 12 23:10:14 2013
@@ -132,6 +132,7 @@ public class OldQRDecomposition implemen
    *
    * @return <tt>Q</tt>
    */
+  @Override
   public Matrix getQ() {
     int columns = Math.min(originalColumns, originalRows);
     Matrix q = qr.like(originalRows, columns);
@@ -154,6 +155,7 @@ public class OldQRDecomposition implemen
    *
    * @return <tt>R</tt>
    */
+  @Override
   public Matrix getR() {
     int rows = Math.min(originalRows, originalColumns);
     Matrix r = qr.like(rows, originalColumns);
@@ -176,6 +178,7 @@ public class OldQRDecomposition implemen
    *
    * @return true if <tt>R</tt>, and hence <tt>A</tt>, has full rank.
    */
+  @Override
   public boolean hasFullRank() {
     for (int j = 0; j < originalColumns; j++) {
       if (rDiag.getQuick(j) == 0) {
@@ -192,6 +195,7 @@ public class OldQRDecomposition implemen
    * @return <tt>X</tt> that minimizes the two norm of <tt>Q*R*X - B</tt>.
    * @throws IllegalArgumentException if <tt>B.rows() != A.rows()</tt>.
    */
+  @Override
   public Matrix solve(Matrix B) {
     if (B.numRows() != originalRows) {
       throw new IllegalArgumentException("Matrix row dimensions must agree.");

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java Tue Mar 12 23:10:14 2013
@@ -41,7 +41,8 @@ import java.util.Locale;
  */
 
 public class QRDecomposition implements QR {
-  private final Matrix q, r;
+  private final Matrix q;
+  private final Matrix r;
   private final boolean fullRank;
   private final int rows;
   private final int columns;

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java?rev=1455748&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java Tue Mar 12 23:10:14 2013
@@ -0,0 +1,143 @@
+/**
+ * <HTML>
+ * <BODY>
+ * Resizable lists holding objects or primitive data types such as <tt>int</tt>,
+ * <tt>double</tt>, etc. For non-resizable lists (1-dimensional matrices) see
+ * package <code>org.apache.mahout.math.matrix</code>.<p></p>
+ * <h1><a name="Overview"></a>Getting Started</h1>
+ * <h2>1. Overview</h2>
+ * <p>The list package offers flexible object oriented abstractions modelling dynamically
+ * resizing lists holding objects or primitive data types such as <tt>int</tt>,
+ * <tt>double</tt>, etc. It is designed to be scalable in terms of performance
+ * and memory requirements.</p>
+ * <p>Features include: </p>
+ * <p></p>
+ * <ul>
+ * <li>Lists operating on objects as well as all primitive data types such as <tt>int</tt>,
+ * <tt>double</tt>, etc.
+ * </li>
+ * <li>Compact representations</li>
+ * <li>A number of general purpose list operations including: adding, inserting,
+ * removing, iterating, searching, sorting, extracting ranges and copying. All
+ * operations are designed to perform well on mass data.
+ * </li>
+ * <li>Support for quick access to list elements. This is achieved by bounds-checking
+ * and non-bounds-checking accessor methods as well as zero-copy transformations
+ * to primitive arrays such as <tt>int[]</tt>, <tt>double[]</tt>, etc.
+ * </li>
+ * <li>Allows to use high level algorithms on primitive data types without any
+ * space and time overhead. Operations on primitive arrays, Colt lists and JAL
+ * algorithms can freely be mixed at zero copy overhead.
+ * </li>
+ * </ul>
+ * <p>File-based I/O can be achieved through the standard Java built-in serialization
+ * mechanism. All classes implement the {@link java.io.Serializable} interface.
+ * However, the toolkit is entirely decoupled from advanced I/O. It provides data
+ * structures and algorithms only.
+ * <p> This toolkit borrows concepts and terminology from the Javasoft <a
+ * href="http://www.javasoft.com/products/jdk/1.2/docs/guide/collections/index.html">
+ * Collections framework</a> written by Josh Bloch and introduced in JDK 1.2.
+ * <h2>2. Introduction</h2>
+ * <p>Lists are fundamental to virtually any application. Large scale resizable lists
+ * are, for example, used in scientific computations, simulations database management
+ * systems, to name just a few.</p>
+ * <h2></h2>
+ * <p>A list is a container holding elements that can be accessed via zero-based
+ * indexes. Lists may be implemented in different ways (most commonly with arrays).
+ * A resizable list automatically grows as elements are added. The lists of this
+ * package do not automatically shrink. Shrinking needs to be triggered by explicitly
+ * calling <tt>trimToSize()</tt> methods.</p>
+ * <p><i>Growing policy</i>: A list implemented with arrays initially has a certain
+ * <tt>initialCapacity</tt> - per default 10 elements, but customizable upon instance
+ * construction. As elements are added, this capacity may nomore be sufficient.
+ * When a list is automatically grown, its capacity is expanded to <tt>1.5*currentCapacity</tt>.
+ * Thus, excessive resizing (involving copying) is avoided.</p>
+ * <h4>Copying</h4>
+ * <p>
+ * <p>Any list can be copied. A copy is <i>equal</i> to the original but entirely
+ * independent of the original. So changes in the copy are not reflected in the
+ * original, and vice-versa.
+ * <h2>3. Organization of this package</h2>
+ * <p>Class naming follows the schema <tt>&lt;ElementType&gt;&lt;ImplementationTechnique&gt;List</tt>.
+ * For example, we have a {@link org.apache.mahout.math.list.DoubleArrayList}, which is a list
+ * holding <tt>double</tt> elements implemented with <tt>double</tt>[] arrays.
+ * </p>
+ * <p>The classes for lists of a given value type are derived from a common abstract
+ * base class tagged <tt>Abstract&lt;ElementType&gt;</tt><tt>List</tt>. For example,
+ * all lists operating on <tt>double</tt> elements are derived from
+ * {@link org.apache.mahout.math.list.AbstractDoubleList},
+ * which in turn is derived from an abstract base class tying together all lists
+ * regardless of value type, {@link org.apache.mahout.math.list.AbstractList}. The abstract
+ * base classes provide skeleton implementations for all but few methods. Experimental
+ * data layouts (such as compressed, sparse, linked, etc.) can easily be implemented
+ * and inherit a rich set of functionality. Have a look at the javadoc <a href="package-tree.html">tree
+ * view</a> to get the broad picture.</p>
+ * <h2>4. Example usage</h2>
+ * <p>The following snippet fills a list, randomizes it, extracts the first half
+ * of the elements, sums them up and prints the result. It is implemented entirely
+ * with accessor methods.</p>
+ * <table>
+ * <td class="PRE">
+ * <pre>
+ * int s = 1000000;<br>AbstractDoubleList list = new DoubleArrayList();
+ * for (int i=0; i&lt;s; i++) { list.add((double)i); }
+ * list.shuffle();
+ * AbstractDoubleList part = list.partFromTo(0,list.size()/2 - 1);
+ * double sum = 0.0;
+ * for (int i=0; i&lt;part.size(); i++) { sum += part.get(i); }
+ * log.info(sum);
+ * </pre>
+ * </td>
+ * </table>
+ * <p> For efficiency, all classes provide back doors to enable getting/setting the
+ * backing array directly. In this way, the high level operations of these classes
+ * can be used where appropriate, and one can switch to <tt>[]</tt>-array index
+ * notations where necessary. The key methods for this are <tt>public &lt;ElementType&gt;[]
+ * elements()</tt> and <tt>public void elements(&lt;ElementType&gt;[])</tt>. The
+ * former trustingly returns the array it internally keeps to store the elements.
+ * Holding this array in hand, we can use the <tt>[]</tt>-array operator to
+ * perform iteration over large lists without needing to copy the array or paying
+ * the performance penalty introduced by accessor methods. Alternatively any JAL
+ * algorithm (or other algorithm) can operate on the returned primitive array.
+ * The latter method forces a list to internally hold a user provided array. Using
+ * this approach one can avoid needing to copy the elements into the list.
+ * <p>As a consequence, operations on primitive arrays, Colt lists and JAL algorithms
+ * can freely be mixed at zero-copy overhead.
+ * <p> Note that such special treatment certainly breaks encapsulation. This functionality
+ * is provided for performance reasons only and should only be used when absolutely
+ * necessary. Here is the above example in mixed notation:
+ * <table>
+ * <td class="PRE">
+ * <pre>
+ * int s = 1000000;<br>DoubleArrayList list = new DoubleArrayList(s); // list.size()==0, capacity==s
+ * list.setSize(s); // list.size()==s<br>double[] values = list.elements(); // zero copy, values.length==s<br>for (int i=0; i&lt;s; i++) { values[i]=(double)i; }
+ * list.shuffle();
+ * double sum = 0.0;
+ * int limit = values.length/2;
+ * for (int i=0; i&lt;limit; i++) { sum += values[i]; }
+ * log.info(sum);
+ * </pre>
+ * </td>
+ * </table>
+ * <p> Or even more compact using lists as algorithm objects:
+ * <table>
+ * <td class="PRE">
+ * <pre>
+ * int s = 1000000;<br>double[] values = new double[s];
+ * for (int i=0; i&lt;s; i++) { values[i]=(double)i; }
+ * new DoubleArrayList(values).shuffle(); // zero-copy, shuffle via back door
+ * double sum = 0.0;
+ * int limit = values.length/2;
+ * for (int i=0; i&lt;limit; i++) { sum += values[i]; }
+ * log.info(sum);
+ * </pre>
+ * </td>
+ * </table>
+ * <p>
+ * <h2>5. Notes </h2>
+ * <p>The quicksorts and mergesorts are the JDK 1.2 V1.26 algorithms, modified as
+ * necessary to operate on the given data types.
+ * </BODY>
+ * </HTML>
+ */
+package org.apache.mahout.math.list;
\ No newline at end of file

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java Tue Mar 12 23:10:14 2013
@@ -35,7 +35,7 @@ import java.util.Arrays;
  * Memory requirements: 1 KB static memory.
  *
  */
-public class PrimeFinder {
+public final class PrimeFinder {
 
   /** The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>. */
   public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java?rev=1455748&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java Tue Mar 12 23:10:14 2013
@@ -0,0 +1,248 @@
+/**
+ * <HTML>
+ * <BODY>
+ * Automatically growing and shrinking maps holding objects or primitive
+ * data types such as <tt>int</tt>, <tt>double</tt>, etc. Currently all maps are
+ * based upon hashing.
+ * <h2><a name="Overview"></a>1. Overview</h2>
+ * <p>The map package offers flexible object oriented abstractions modelling automatically
+ * resizing maps. It is designed to be scalable in terms of performance and memory
+ * requirements.</p>
+ * <p>Features include: </p>
+ * <p></p>
+ * <ul>
+ * <li>Maps operating on objects as well as all primitive data types such as <code>int</code>,
+ * <code>double</code>, etc.
+ * </li>
+ * <li>Compact representations</li>
+ * <li>Support for quick access to associations</li>
+ * <li>A number of general purpose map operations</li>
+ * </ul>
+ * <p>File-based I/O can be achieved through the standard Java built-in serialization
+ * mechanism. All classes implement the {@link java.io.Serializable} interface.
+ * However, the toolkit is entirely decoupled from advanced I/O. It provides data
+ * structures and algorithms only.
+ * <p> This toolkit borrows some terminology from the Javasoft <a
+ * href="http://www.javasoft.com/products/jdk/1.2/docs/guide/collections/index.html">
+ * Collections framework</a> written by Josh Bloch and introduced in JDK 1.2.
+ * <h2>2. Introduction</h2>
+ * <p>A map is an associative container that manages a set of (key,value) pairs.
+ * It is useful for implementing a collection of one-to-one mappings. A (key,value)
+ * pair is called an <i>association</i>. A value can be looked up up via its key.
+ * Associations can quickly be set, removed and retrieved. They are stored in a
+ * hashing structure based on the hash code of their keys, which is obtained by
+ * using a hash function. </p>
+ * <p> A map can, for example, contain <tt>Name-->Location</tt> associations like
+ * <tt>{("Pete", "Geneva"), ("Steve", "Paris"), ("Robert", "New York")}</tt> used
+ * in address books or <tt>Index-->Value</tt> mappings like <tt>{(0, 100), (3,
+ * 1000), (100000, 70)}</tt> representing sparse lists or matrices. For example
+ * this could mean at index 0 we have a value of 100, at index 3 we have a value
+ * of 1000, at index 1000000 we have a value of 70, and at all other indexes we
+ * have a value of, say, zero. Another example is a map of IP addresses to domain
+ * names (DNS). Maps can also be useful to represent<i> multi sets</i>, that is,
+ * sets where elements can occur more than once. For multi sets one would have
+ * <tt>Value-->Frequency</tt> mappings like <tt>{(100, 1), (50, 1000), (101, 3))}</tt>
+ * meaning element 100 occurs 1 time, element 50 occurs 1000 times, element 101
+ * occurs 3 times. Further, maps can also manage <tt>ObjectIdentifier-->Object</tt>
+ * mappings like <tt>{(12, obj1), (7, obj2), (10000, obj3), (9, obj4)}</tt> used
+ * in Object Databases.
+ * <p> A map cannot contain two or more <i>equal</i> keys; a key can map to at most
+ * one value. However, more than one key can map to identical values. For primitive
+ * data types "equality" of keys is defined as identity (operator <tt>==</tt>).
+ * For maps using <tt>Object</tt> keys, the meaning of "equality" can be specified
+ * by the user upon instance construction. It can either be defined to be identity
+ * (operator <tt>==</tt>) or to be given by the method {@link java.lang.Object#equals(Object)}.
+ * Associations of kind <tt>(AnyType,Object)</tt> can be of the form <tt>(AnyKey,null)
+ * </tt>, i.e. values can be <tt>null</tt>.
+ * <p> The classes of this package make no guarantees as to the order of the elements
+ * returned by iterators; in particular, they do not guarantee that the order will
+ * remain constant over time.
+ * <h2></h2>
+ * <h4>Copying</h4>
+ * <p>
+ * <p>Any map can be copied. A copy is <i>equal</i> to the original but entirely
+ * independent of the original. So changes in the copy are not reflected in the
+ * original, and vice-versa.
+ * <h2>3. Package organization</h2>
+ * <p>For most primitive data types and for objects there exists a separate map version.
+ * All versions are just the same, except that they operate on different data types.
+ * Colt includes two kinds of implementations for maps: The two different implementations
+ * are tagged <b>Chained</b> and <b>Open</b>.
+ * Note: Chained is no more included. Wherever it is mentioned it is of historic interest only.</p>
+ * <ul>
+ * <li><b>Chained</b> uses extendible separate chaining with chains holding unsorted
+ * dynamically linked collision lists.
+ * <li><b>Open</b> uses extendible open addressing with double hashing.
+ * </ul>
+ * <p>Class naming follows the schema <tt>&lt;Implementation&gt;&lt;KeyType&gt;&lt;ValueType&gt;HashMap</tt>.
+ * For example, a {@link org.apache.mahout.math.map.OpenIntDoubleHashMap} holds <tt>(int-->double)</tt>
+ * associations and is implemented with open addressing. A {@link org.apache.mahout.math.map.OpenIntObjectHashMap}
+ * holds <tt>(int-->Object)</tt> associations and is implemented with open addressing.
+ * </p>
+ * <p>The classes for maps of a given (key,value) type are derived from a common
+ * abstract base class tagged <tt>Abstract&lt;KeyType&gt;&lt;ValueType&gt;</tt><tt>Map</tt>.
+ * For example, all maps operating on <tt>(int-->double)</tt> associations are
+ * derived from {@link org.apache.mahout.math.map.AbstractIntDoubleMap}, which in turn is derived
+ * from an abstract base class tying together all maps regardless of assocation
+ * type, {@link org.apache.mahout.math.set.AbstractSet}. The abstract base classes provide skeleton
+ * implementations for all but few methods. Experimental layouts (such as chaining,
+ * open addressing, extensible hashing, red-black-trees, etc.) can easily be implemented
+ * and inherit a rich set of functionality. Have a look at the javadoc <a href="package-tree.html">tree
+ * view</a> to get the broad picture.</p>
+ * <h2>4. Example usage</h2>
+ * <TABLE>
+ * <TD CLASS="PRE">
+ * <PRE>
+ * int[]    keys   = {0    , 3     , 100000, 9   };
+ * double[] values = {100.0, 1000.0, 70.0  , 71.0};
+ * AbstractIntDoubleMap map = new OpenIntDoubleHashMap();
+ * // add several associations
+ * for (int i=0; i &lt; keys.length; i++) map.put(keys[i], values[i]);
+ * log.info("map="+map);
+ * log.info("size="+map.size());
+ * log.info(map.containsKey(3));
+ * log.info("get(3)="+map.get(3));
+ * log.info(map.containsKey(4));
+ * log.info("get(4)="+map.get(4));
+ * log.info(map.containsValue(71.0));
+ * log.info("keyOf(71.0)="+map.keyOf(71.0));
+ * // remove one association
+ * map.removeKey(3);
+ * log.info("\nmap="+map);
+ * log.info(map.containsKey(3));
+ * log.info("get(3)="+map.get(3));
+ * log.info(map.containsValue(1000.0));
+ * log.info("keyOf(1000.0)="+map.keyOf(1000.0));
+ * // clear
+ * map.clear();
+ * log.info("\nmap="+map);
+ * log.info("size="+map.size());
+ * </PRE>
+ * </TD>
+ * </TABLE>
+ * yields the following output
+ * <TABLE>
+ * <TD CLASS="PRE">
+ * <PRE>
+ * map=[0->100.0, 3->1000.0, 9->71.0, 100000->70.0]
+ * size=4
+ * true
+ * get(3)=1000.0
+ * false
+ * get(4)=0.0
+ * true
+ * keyOf(71.0)=9
+ * map=[0->100.0, 9->71.0, 100000->70.0]
+ * false
+ * get(3)=0.0
+ * false
+ * keyOf(1000.0)=-2147483648
+ * map=[]
+ * size=0
+ * </PRE>
+ * </TD>
+ * </TABLE>
+ * <h2> 5. Notes </h2>
+ * <p>
+ * Note that implementations are not synchronized.
+ * <p>
+ * Choosing efficient parameters for hash maps is not always easy.
+ * However, since parameters determine efficiency and memory requirements, here is a quick guide how to choose them.
+ * If your use case does not heavily operate on hash maps but uses them just because they provide
+ * convenient functionality, you can safely skip this section.
+ * For those of you who care, read on.
+ * <p>
+ * There are three parameters that can be customized upon map construction: <tt>initialCapacity</tt>,
+ * <tt>minLoadFactor</tt> and <tt>maxLoadFactor</tt>.
+ * The more memory one can afford, the faster a hash map.
+ * The hash map's capacity is the maximum number of associations that can be added without needing to allocate new
+ * internal memory.
+ * A larger capacity means faster adding, searching and removing.
+ * The <tt>initialCapacity</tt> corresponds to the capacity used upon instance construction.
+ * <p>
+ * The <tt>loadFactor</tt> of a hash map measures the degree of "fullness".
+ * It is given by the number of assocations (<tt>size()</tt>)
+ * divided by the hash map capacity <tt>(0.0 &lt;= loadFactor &lt;= 1.0)</tt>.
+ * The more associations are added, the larger the loadFactor and the more hash map performance degrades.
+ * Therefore, when the loadFactor exceeds a customizable threshold (<tt>maxLoadFactor</tt>), the hash map is
+ * automatically grown.
+ * In such a way performance degradation can be avoided.
+ * Similarly, when the loadFactor falls below a customizable threshold (<tt>minLoadFactor</tt>), the hash map is
+ * automatically shrinked.
+ * In such a way excessive memory consumption can be avoided.
+ * Automatic resizing (both growing and shrinking) obeys the following invariant:
+ * <p>
+ * <tt>capacity * minLoadFactor <= size() <= capacity * maxLoadFactor</tt>
+ * <p> The term <tt>capacity * minLoadFactor</tt> is called the <i>low water mark</i>,
+ * <tt>capacity * maxLoadFactor</tt> is called the <i>high water mark</i>. In other
+ * words, the number of associations may vary within the water mark constraints.
+ * When it goes out of range, the map is automatically resized and memory consumption
+ * changes proportionally.
+ * <ul>
+ * <li>To tune for memory at the expense of performance, both increase <tt>minLoadFactor</tt> and <tt>maxLoadFactor</tt>.
+ * <li>To tune for performance at the expense of memory, both decrease <tt>minLoadFactor</tt> and <tt>maxLoadFactor</tt>.
+ * As as special case set <tt>minLoadFactor=0</tt> to avoid any automatic shrinking.
+ * </ul>
+ * Resizing large hash maps can be time consuming, <tt>O(size())</tt>, and should be avoided if possible (maintaining
+ * primes is not the reason).
+ * Unnecessary growing operations can be avoided if the number of associations is known before they are added, or can be
+ * estimated.<p>
+ * In such a case good parameters are as follows:
+ * <p>
+ * <i>For chaining:</i>
+ * <br>Set the <tt>initialCapacity = 1.4*expectedSize</tt> or greater.
+ * <br>Set the <tt>maxLoadFactor = 0.8</tt> or greater.
+ * <p>
+ * <i>For open addressing:</i>
+ * <br>Set the <tt>initialCapacity = 2*expectedSize</tt> or greater. Alternatively call <tt>ensureCapacity(...)</tt>.
+ * <br>Set the <tt>maxLoadFactor = 0.5</tt>.
+ * <br>Never set <tt>maxLoadFactor &gt; 0.55</tt>; open addressing exponentially slows down beyond that point.
+ * <p>
+ * In this way the hash map will never need to grow and still stay fast.
+ * It is never a good idea to set <tt>maxLoadFactor &lt; 0.1</tt>,
+ * because the hash map would grow too often.
+ * If it is entirelly unknown how many associations the application will use,
+ * the default constructor should be used. The map will grow and shrink as needed.
+ * <p>
+ * <b>Comparision of chaining and open addressing</b>
+ * <p> Chaining is faster than open addressing, when assuming unconstrained memory
+ * consumption. Open addressing is more space efficient than chaining, because
+ * it does not create entry objects but uses primitive arrays which are considerably
+ * smaller. Entry objects consume significant amounts of memory compared to the
+ * information they actually hold. Open addressing also poses no problems to the
+ * garbage collector. In contrast, chaining can create millions of entry objects
+ * which are linked; a nightmare for any garbage collector. In addition, entry
+ * object creation is a bit slow. <br>
+ * Therefore, with the same amount of memory, or even less memory, hash maps with
+ * larger capacity can be maintained under open addressing, which yields smaller
+ * loadFactors, which in turn keeps performance competitive with chaining. In our
+ * benchmarks, using significantly less memory, open addressing usually is not
+ * more than 1.2-1.5 times slower than chaining.
+ * <p><b>Further readings</b>:
+ * <br>Knuth D., The Art of Computer Programming: Searching and Sorting, 3rd ed.
+ * <br>Griswold W., Townsend G., The Design and Implementation of Dynamic Hashing for Sets and Tables in Icon, Software -
+ * Practice and Experience, Vol. 23(4), 351-367 (April 1993).
+ * <br>Larson P., Dynamic hash tables, Comm. of the ACM, 31, (4), 1988.
+ * <p>
+ * <b>Performance:</b>
+ * <p>
+ * Time complexity:
+ * <br>The classes offer <i>expected</i> time complexity <tt>O(1)</tt> (i.e. constant time) for the basic operations
+ * <tt>put</tt>, <tt>get</tt>, <tt>removeKey</tt>, <tt>containsKey</tt> and <tt>size</tt>,
+ * assuming the hash function disperses the elements properly among the buckets.
+ * Otherwise, pathological cases, although highly improbable, can occur, degrading performance to <tt>O(N)</tt> in the
+ * worst case.
+ * Operations <tt>containsValue</tt> and <tt>keyOf</tt> are <tt>O(N)</tt>.
+ * <p>
+ * Memory requirements for <i>open addressing</i>:
+ * <br>worst case: <tt>memory [bytes] = (1/minLoadFactor) * size() * (1 + sizeOf(key) + sizeOf(value))</tt>.
+ * <br>best case: <tt>memory [bytes] = (1/maxLoadFactor) * size() * (1 + sizeOf(key) + sizeOf(value))</tt>.
+ * Where <tt>sizeOf(int) = 4</tt>, <tt>sizeOf(double) = 8</tt>, <tt>sizeOf(Object) = 4</tt>, etc.
+ * Thus, an <tt>OpenIntIntHashMap</tt> with minLoadFactor=0.25 and maxLoadFactor=0.5 and 1000000 associations uses
+ * between 17 MB and 34 MB.
+ * The same map with 1000 associations uses between 17 and 34 KB.
+ * <p>
+ * </BODY>
+ * </HTML>
+ */
+package org.apache.mahout.math.map;
\ No newline at end of file