You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2013/03/13 00:10:16 UTC
svn commit: r1455748 [1/2] - in /mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/
core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/
core/src/main/java/org/apache/mahout/classifier/
core/src/main/java/org/apache/...
Author: srowen
Date: Tue Mar 12 23:10:14 2013
New Revision: 1455748
URL: http://svn.apache.org/r1455748
Log:
More changes from code inspection / findbugs / pmd
Added:
mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java
Removed:
mahout/trunk/math/src/main/java/org/apache/mahout/math/function/package.html
mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package.html
mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package.html
mahout/trunk/math/src/main/java/org/apache/mahout/math/package.html
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/random/SyntheticVariable.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/solver/ConjugateGradientSolver.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/FileBasedSparseBinaryMatrixTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/QRDecompositionTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Tue Mar 12 23:10:14 2013
@@ -202,10 +202,9 @@ public class ParallelALSFactorizationJob
static class ItemRatingVectorsMapper extends Mapper<LongWritable,Text,IntWritable,VectorWritable> {
- private IntWritable itemIDWritable = new IntWritable();
- private VectorWritable ratingsWritable = new VectorWritable(true);
-
- private Vector ratings = new SequentialAccessSparseVector(Integer.MAX_VALUE, 1);
+ private final IntWritable itemIDWritable = new IntWritable();
+ private final VectorWritable ratingsWritable = new VectorWritable(true);
+ private final Vector ratings = new SequentialAccessSparseVector(Integer.MAX_VALUE, 1);
@Override
protected void map(LongWritable offset, Text line, Context ctx) throws IOException, InterruptedException {
@@ -251,7 +250,7 @@ public class ParallelALSFactorizationJob
private int numFeatures;
private OpenIntObjectHashMap<Vector> UorM;
- private VectorWritable uiOrmjWritable = new VectorWritable();
+ private final VectorWritable uiOrmjWritable = new VectorWritable();
@Override
protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
@@ -287,7 +286,7 @@ public class ParallelALSFactorizationJob
private ImplicitFeedbackAlternatingLeastSquaresSolver solver;
- private VectorWritable uiOrmjWritable = new VectorWritable();
+ private final VectorWritable uiOrmjWritable = new VectorWritable();
@Override
protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
@@ -316,9 +315,9 @@ public class ParallelALSFactorizationJob
static class AverageRatingMapper extends Mapper<IntWritable,VectorWritable,IntWritable,VectorWritable> {
- private IntWritable firstIndex = new IntWritable(0);
- private Vector featureVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
- private VectorWritable featureVectorWritable = new VectorWritable();
+ private final IntWritable firstIndex = new IntWritable(0);
+ private final Vector featureVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
+ private final VectorWritable featureVectorWritable = new VectorWritable();
@Override
protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java Tue Mar 12 23:10:14 2013
@@ -107,7 +107,7 @@ public class RecommenderJob extends Abst
private int recommendationsPerUser;
private float maxRating;
- private RecommendedItemsWritable recommendations = new RecommendedItemsWritable();
+ private final RecommendedItemsWritable recommendations = new RecommendedItemsWritable();
@Override
protected void setup(Context ctx) throws IOException, InterruptedException {
@@ -123,7 +123,7 @@ public class RecommenderJob extends Abst
}
// we can use a simple dot product computation, as both vectors are dense
- private double dot(Vector x, Vector y) {
+ private static double dot(Vector x, Vector y) {
int numFeatures = x.size();
double sum = 0;
for (int n = 0; n < numFeatures; n++) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java Tue Mar 12 23:10:14 2013
@@ -40,7 +40,7 @@ public class TopItemQueue extends Priori
while (size() > 0) {
MutableRecommendedItem topItem = pop();
// filter out "sentinel" objects necessary for maintaining an efficient priority queue
- if (topItem.getItemID() != TopItemQueue.SENTINEL_ID) {
+ if (topItem.getItemID() != SENTINEL_ID) {
recommendedItems.add(topItem);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java Tue Mar 12 23:10:14 2013
@@ -242,7 +242,7 @@ public final class ImplicitLinearRegress
/* calculating cosine similarity to determine when to stop the algorithm, this could be used to detect convergence */
double cosine = aTb / (Math.sqrt(normA) * Math.sqrt(normB));
if (Double.isNaN(cosine)) {
- log.info("Cosine similarity is NaN, recomputeUserFeatures=" + recomputeUserFeatures + " id=" + id);
+ log.info("Cosine similarity is NaN, recomputeUserFeatures={} id={}", recomputeUserFeatures, id);
} else {
avrChange.addDatum(cosine);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java Tue Mar 12 23:10:14 2013
@@ -28,17 +28,17 @@ import com.google.common.base.Preconditi
* Defines the interface for classifiers that take a vector as input. This is
* implemented as an abstract class so that it can implement a number of handy
* convenience methods related to classification of vectors.
- *
+ *
* <p>
* A classifier takes an input vector and calculates the scores (usually
- * probabilities) that the input vector belongs to one of <code>n</code>
- * categories. In <code>AbstractVectorClassifier</code> each category is denoted
- * by an integer <code>c</code> between <code>0</code> and <code>n-1</code>
+ * probabilities) that the input vector belongs to one of {@code n}
+ * categories. In {@code AbstractVectorClassifier} each category is denoted
+ * by an integer {@code c} between {@code 0} and {@code n-1}
* (inclusive).
- *
+ *
* <p>
* New users should start by looking at {@link #classifyFull} (not {@link #classify}).
- *
+ *
*/
public abstract class AbstractVectorClassifier {
@@ -46,26 +46,26 @@ public abstract class AbstractVectorClas
public static final double MIN_LOG_LIKELIHOOD = -100.0;
/**
- * Returns the number of categories that a target variable can be assigned to.
- * A vector classifier will encode it's output as an integer from
- * <code>0</code> to <code>numCategories()-1</code> (inclusive).
- *
- * @return The number of categories.
- */
+ * Returns the number of categories that a target variable can be assigned to.
+ * A vector classifier will encode it's output as an integer from
+ * {@code 0} to {@code numCategories()-1} (inclusive).
+ *
+ * @return The number of categories.
+ */
public abstract int numCategories();
/**
- * Compute and return a vector containing <code>n-1</code> scores, where
- * <code>n</code> is equal to <code>numCategories()</code>, given an input
- * vector <code>instance</code>. Higher scores indicate that the input vector
+ * Compute and return a vector containing {@code n-1} scores, where
+ * {@code n} is equal to {@code numCategories()}, given an input
+ * vector {@code instance}. Higher scores indicate that the input vector
* is more likely to belong to that category. The categories are denoted by
- * the integers <code>0</code> through <code>n-1</code> (inclusive), and the
+ * the integers {@code 0} through {@code n-1} (inclusive), and the
* scores in the returned vector correspond to categories 1 through
- * <code>n-1</code> (leaving out category 0). It is assumed that the score for
+ * {@code n-1} (leaving out category 0). It is assumed that the score for
* category 0 is one minus the sum of the scores in the returned vector.
- *
+ *
* @param instance A feature vector to be classified.
- * @return A vector of probabilities in 1 of <code>n-1</code> encoding.
+ * @return A vector of probabilities in 1 of {@code n-1} encoding.
*/
public abstract Vector classify(Vector instance);
@@ -75,7 +75,7 @@ public abstract class AbstractVectorClas
* is just the linear part of the classification.
*
* <p>
- * The implementation of this method provided by {@link AbstractVectorClassifier} throws an
+ * The implementation of this method provided by {@code AbstractVectorClassifier} throws an
* {@link UnsupportedOperationException}. Your subclass must explicitly override this method to support
* this operation.
*
@@ -100,33 +100,33 @@ public abstract class AbstractVectorClas
public abstract double classifyScalar(Vector instance);
/**
- * Computes and returns a vector containing <code>n</code> scores, where
- * <code>n</code> is <code>numCategories()</code>, given an input vector
- * <code>instance</code>. Higher scores indicate that the input vector is more
+ * Computes and returns a vector containing {@code n} scores, where
+ * {@code n} is {@code numCategories()}, given an input vector
+ * {@code instance}. Higher scores indicate that the input vector is more
* likely to belong to the corresponding category. The categories are denoted
- * by the integers <code>0</code> through <code>n-1</code> (inclusive).
- *
+ * by the integers {@code 0} through {@code n-1} (inclusive).
+ *
* <p>
* Using this method it is possible to classify an input vector, for example,
* by selecting the category with the largest score. If
- * <code>classifier</code> is an instance of
- * <code>AbstractVectorClassifier</code> and <code>input</code> is a
- * <code>Vector</code> of features describing an element to be classified,
- * then the following code could be used to classify <code>input</code>.<br>
- * <code>
+ * {@code classifier} is an instance of
+ * {@code AbstractVectorClassifier} and {@code input} is a
+ * {@code Vector} of features describing an element to be classified,
+ * then the following code could be used to classify {@code input}.<br>
+ * {@code
* Vector scores = classifier.classifyFull(input);<br>
* int assignedCategory = scores.maxValueIndex();<br>
- * </code> Here <code>assignedCategory</code> is the index of the category
+ * } Here {@code assignedCategory} is the index of the category
* with the maximum score.
- *
+ *
* <p>
- * If an <code>n-1</code> encoding is acceptable, and allocation performance
+ * If an {@code n-1} encoding is acceptable, and allocation performance
* is an issue, then the {@link #classify(Vector)} method is probably better
* to use.
- *
+ *
* @see #classify(Vector)
* @see #classifyFull(Vector r, Vector instance)
- *
+ *
* @param instance A vector of features to be classified.
* @return A vector of probabilities, one for each category.
*/
@@ -135,29 +135,29 @@ public abstract class AbstractVectorClas
}
/**
- * Computes and returns a vector containing <code>n</code> scores, where
- * <code>n</code> is <code>numCategories()</code>, given an input vector
- * <code>instance</code>. Higher scores indicate that the input vector is more
+ * Computes and returns a vector containing {@code n} scores, where
+ * {@code n} is {@code numCategories()}, given an input vector
+ * {@code instance}. Higher scores indicate that the input vector is more
* likely to belong to the corresponding category. The categories are denoted
- * by the integers <code>0</code> through <code>n-1</code> (inclusive). The
+ * by the integers {@code 0} through {@code n-1} (inclusive). The
* main difference between this method and {@link #classifyFull(Vector)} is
* that this method allows a user to provide a previously allocated
- * <code>Vector r</code> to store the returned scores.
- *
+ * {@code Vector r} to store the returned scores.
+ *
* <p>
* Using this method it is possible to classify an input vector, for example,
* by selecting the category with the largest score. If
- * <code>classifier</code> is an instance of
- * <code>AbstractVectorClassifier</code>, <code>result</code> is a non-null
- * <code>Vector</code>, and <code>input</code> is a <code>Vector</code> of
+ * {@code classifier} is an instance of
+ * {@code AbstractVectorClassifier}, {@code result} is a non-null
+ * {@code Vector}, and {@code input} is a {@code Vector} of
* features describing an element to be classified, then the following code
- * could be used to classify <code>input</code>.<br>
- * <code>
+ * could be used to classify {@code input}.<br>
+ * {@code
* Vector scores = classifier.classifyFull(result, input); // Notice that scores == result<br>
* int assignedCategory = scores.maxValueIndex();<br>
- * </code> Here <code>assignedCategory</code> is the index of the category
+ * } Here {@code assignedCategory} is the index of the category
* with the maximum score.
- *
+ *
* @param r Where to put the results.
* @param instance A vector of features to be classified.
* @return A vector of scores/probabilities, one for each category.
@@ -171,10 +171,10 @@ public abstract class AbstractVectorClas
/**
* Returns n-1 probabilities, one for each categories 1 through
- * <code>n-1</code>, for each row of a matrix, where <code>n</code> is equal
- * to <code>numCategories()</code>. The probability of the missing 0-th
+ * {@code n-1}, for each row of a matrix, where {@code n} is equal
+ * to {@code numCategories()}. The probability of the missing 0-th
* category is 1 - rowSum(this result).
- *
+ *
* @param data The matrix whose rows are the input vectors to classify
* @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
*/
@@ -187,8 +187,8 @@ public abstract class AbstractVectorClas
}
/**
- * Returns a matrix where the rows of the matrix each contain <code>n</code> probabilities, one for each category.
- *
+ * Returns a matrix where the rows of the matrix each contain {@code n} probabilities, one for each category.
+ *
* @param data The matrix whose rows are the input vectors to classify
* @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
*/
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java Tue Mar 12 23:10:14 2013
@@ -145,8 +145,7 @@ public class DecisionTreeBuilder impleme
// variance is compared with minimum variance
if ((var / data.size()) < minVariance) {
- log.debug("variance(" + (var / data.size()) + ") < minVariance(" + minVariance + ") Leaf(" +
- (sum / data.size()) + ')');
+ log.debug("variance({}) < minVariance({}) Leaf({})", var / data.size(), minVariance, sum / data.size());
return new Leaf(sum / data.size());
}
} else {
@@ -210,8 +209,7 @@ public class DecisionTreeBuilder impleme
return new Leaf(label);
}
- log.debug("best split attr:" + best.getAttr() + ", split:" + best.getSplit() + ", ig:"
- + best.getIg());
+ log.debug("best split attr:{}, split:{}, ig:{}", best.getIg(), best.getAttr(), best.getSplit(), best.getIg());
boolean alreadySelected = selected[best.getAttr()];
if (alreadySelected) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Tue Mar 12 23:10:14 2013
@@ -25,8 +25,8 @@ import org.apache.mahout.math.Vector.Ele
/**
* Class implementing the Naive Bayes Classifier Algorithm. Note that this class
- * supports {@link #classifyFull}, but not <code>classify</code> or
- * <code>classifyScalar</code>. The reason that these two methods are not
+ * supports {@link #classifyFull}, but not {@code classify} or
+ * {@code classifyScalar}. The reason that these two methods are not
* supported is because the scores computed by a NaiveBayesClassifier do not
* represent probabilities.
*/
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java Tue Mar 12 23:10:14 2013
@@ -140,7 +140,7 @@ public class CrossFoldLearner extends Ab
}
}
- private long mod(long x, int y) {
+ private static long mod(long x, int y) {
long r = x % y;
return r < 0 ? r + y : r;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java Tue Mar 12 23:10:14 2013
@@ -343,7 +343,7 @@ public class CVB0Driver extends Abstract
private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "Calculating perplexity for " + modelPath;
- log.info("About to run: " + jobName);
+ log.info("About to run: {}", jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CachingCVB0PerplexityMapper.class);
job.setMapperClass(CachingCVB0PerplexityMapper.class);
@@ -423,7 +423,7 @@ public class CVB0Driver extends Abstract
private static Job writeTopicModel(Configuration conf, Path modelInput, Path output)
throws IOException, InterruptedException, ClassNotFoundException {
String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
- log.info("About to run: " + jobName);
+ log.info("About to run: {}", jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CVB0Driver.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
@@ -441,7 +441,7 @@ public class CVB0Driver extends Abstract
private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
- log.info("About to run: " + jobName);
+ log.info("About to run: {}", jobName);
Job job = new Job(conf, jobName);
job.setMapperClass(CVB0DocInferenceMapper.class);
job.setNumReduceTasks(0);
@@ -483,7 +483,7 @@ public class CVB0Driver extends Abstract
int iterationNumber = 1;
Path iterationPath = modelPath(modelTempDir, iterationNumber);
while (fs.exists(iterationPath) && iterationNumber <= maxIterations) {
- log.info("Found previous state: " + iterationPath);
+ log.info("Found previous state: {}", iterationPath);
iterationNumber++;
iterationPath = modelPath(modelTempDir, iterationNumber);
}
@@ -495,7 +495,7 @@ public class CVB0Driver extends Abstract
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = String.format("Iteration %d of %d, input path: %s",
iterationNumber, maxIterations, modelInput);
- log.info("About to run: " + jobName);
+ log.info("About to run: {}", jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CVB0Driver.class);
job.setMapperClass(CachingCVB0Mapper.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Tue Mar 12 23:10:14 2013
@@ -225,12 +225,12 @@ public class InMemoryCollapsedVariationa
while (iter < minIter) {
trainDocuments(testFraction);
if (verbose) {
- log.info("model after: " + iter + ": " + modelTrainer.getReadModel().toString());
+ log.info("model after: {}: {}", iter, modelTrainer.getReadModel());
}
- log.info("iteration " + iter + " complete");
+ log.info("iteration {} complete", iter);
oldPerplexity = modelTrainer.calculatePerplexity(corpusWeights, docTopicCounts,
testFraction);
- log.info(oldPerplexity + " = perplexity");
+ log.info("{} = perplexity", oldPerplexity);
iter++;
}
double newPerplexity = 0;
@@ -238,14 +238,14 @@ public class InMemoryCollapsedVariationa
while (iter < maxIterations && fractionalChange > minFractionalErrorChange) {
trainDocuments();
if (verbose) {
- log.info("model after: " + iter + ": " + modelTrainer.getReadModel().toString());
+ log.info("model after: {}: {}", iter, modelTrainer.getReadModel());
}
newPerplexity = modelTrainer.calculatePerplexity(corpusWeights, docTopicCounts,
testFraction);
- log.info(newPerplexity + " = perplexity");
+ log.info("{} = perplexity", newPerplexity);
iter++;
fractionalChange = Math.abs(newPerplexity - oldPerplexity) / oldPerplexity;
- log.info(fractionalChange + " = fractionalChange");
+ log.info("{} = fractionalChange", fractionalChange);
oldPerplexity = newPerplexity;
}
if (iter < maxIterations) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java Tue Mar 12 23:10:14 2013
@@ -96,7 +96,7 @@ public class ModelTrainer {
}
public void start() {
- log.info("Starting training threadpool with " + numTrainThreads + " threads");
+ log.info("Starting training threadpool with {} threads", numTrainThreads);
workQueue = new ArrayBlockingQueue<Runnable>(numTrainThreads * 10);
threadPool = new ThreadPoolExecutor(numTrainThreads, numTrainThreads, 0, TimeUnit.SECONDS,
workQueue);
@@ -169,10 +169,10 @@ public class ModelTrainer {
(System.nanoTime() - start) /(1.0e6 * document.getNumNondefaultElements());
if (i % 100 == 0) {
long time = System.nanoTime() - startTime;
- log.debug("trained " + i + " documents in " + (time / 1.0e6) + "ms");
+ log.debug("trained {} documents in {}ms", i, time / 1.0e6);
if (i % 500 == 0) {
Arrays.sort(times);
- log.debug("training took median " + times[times.length / 2] + "ms per token-instance");
+ log.debug("training took median {}ms per token-instance", times[times.length / 2]);
}
}
}
@@ -211,7 +211,7 @@ public class ModelTrainer {
numTopics, numTerms, true), numDocTopicIters));
return;
} catch (InterruptedException e) {
- log.warn("Interrupted waiting to submit document to work queue: " + document, e);
+ log.warn("Interrupted waiting to submit document to work queue: {}", document, e);
}
}
}
@@ -239,11 +239,11 @@ public class ModelTrainer {
log.warn("Threadpool timed out on await termination - jobs still running!");
}
long newTime = System.nanoTime();
- log.info("threadpool took: " + (newTime - startTime) / 1.0e6 + "ms");
+ log.info("threadpool took: {}ms", (newTime - startTime) / 1.0e6);
startTime = newTime;
writeModel.awaitTermination();
newTime = System.nanoTime();
- log.info("writeModel.awaitTermination() took " + (newTime - startTime) / 1.0e6 + "ms");
+ log.info("writeModel.awaitTermination() took {}ms", (newTime - startTime) / 1.0e6);
TopicModel tmpModel = writeModel;
writeModel = readModel;
readModel = tmpModel;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java Tue Mar 12 23:10:14 2013
@@ -21,11 +21,10 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.util.Version;
import org.apache.mahout.common.ClassUtils;
-/**
- *
- *
- **/
-public class AnalyzerUtils {
+public final class AnalyzerUtils {
+
+ private AnalyzerUtils() {
+ }
/**
* Create an Analyzer using the latest {@link org.apache.lucene.util.Version}. Note, if you need to pass in parameters
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsReducer.java Tue Mar 12 23:10:14 2013
@@ -28,7 +28,7 @@ import java.io.IOException;
public class MergeVectorsReducer extends
Reducer<WritableComparable<?>,VectorWritable,WritableComparable<?>,VectorWritable> {
- private VectorWritable result = new VectorWritable();
+ private final VectorWritable result = new VectorWritable();
@Override
public void reduce(WritableComparable<?> key, Iterable<VectorWritable> vectors, Context ctx)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java Tue Mar 12 23:10:14 2013
@@ -335,7 +335,7 @@ public final class FPTree {
int attribute = node.attribute();
if (items == null) {
// at root
- if (!(node == root)) {
+ if (node != root) {
throw new IllegalStateException();
}
items = new IntArrayList();
@@ -352,7 +352,7 @@ public final class FPTree {
qTree.accumulate(items, toAdd);
added += toAdd;
}
- if (!(node == root)) {
+ if (node != root) {
int lastIdx = items.size() - 1;
if (items.get(lastIdx) != attribute) {
throw new IllegalStateException();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java Tue Mar 12 23:10:14 2013
@@ -96,7 +96,7 @@ public class VectorDistanceSimilarityJob
String maxDistanceArg = getOption(MAX_DISTANCE);
if (maxDistanceArg != null) {
maxDistance = Double.parseDouble(maxDistanceArg);
- Preconditions.checkArgument(maxDistance > 0d, "value for " + MAX_DISTANCE + " must be greater than zero");
+ Preconditions.checkArgument(maxDistance > 0.0d, "value for " + MAX_DISTANCE + " must be greater than zero");
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java Tue Mar 12 23:10:14 2013
@@ -62,7 +62,7 @@ import org.apache.mahout.math.hadoop.sto
* reducer.
*/
@SuppressWarnings("deprecation")
-public class ABtDenseOutJob {
+public final class ABtDenseOutJob {
public static final String PROP_BT_PATH = "ssvd.Bt.path";
public static final String PROP_BT_BROADCAST = "ssvd.Bt.broadcast";
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java Tue Mar 12 23:10:14 2013
@@ -21,7 +21,7 @@ import org.apache.lucene.search.similari
//TODO: add a new class that supports arbitrary Lucene similarity implementations
public class TFIDF implements Weight {
- private DefaultSimilarity sim = new DefaultSimilarity();
+ private final DefaultSimilarity sim = new DefaultSimilarity();
public TFIDF() {
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java Tue Mar 12 23:10:14 2013
@@ -19,6 +19,7 @@ package org.apache.mahout.cf.taste.hadoo
import org.apache.mahout.cf.taste.impl.TasteTestCase;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.MahoutTestCase;
import org.junit.Test;
import java.util.List;
@@ -28,33 +29,33 @@ public class TopItemQueueTest extends Ta
@Test
public void topK() {
- float[] ratings = { .5f, .6f, .7f, 2f, 0f };
+ float[] ratings = {0.5f, 0.6f, 0.7f, 2.0f, 0.0f};
List<RecommendedItem> topItems = findTop(ratings, 2);
assertEquals(2, topItems.size());
- assertEquals(3l, topItems.get(0).getItemID());
- assertEquals(2f, topItems.get(0).getValue(), TasteTestCase.EPSILON);
- assertEquals(2l, topItems.get(1).getItemID());
- assertEquals(.7f, topItems.get(1).getValue(), TasteTestCase.EPSILON);
+ assertEquals(3L, topItems.get(0).getItemID());
+ assertEquals(2.0f, topItems.get(0).getValue(), MahoutTestCase.EPSILON);
+ assertEquals(2L, topItems.get(1).getItemID());
+ assertEquals(0.7f, topItems.get(1).getValue(), MahoutTestCase.EPSILON);
}
@Test
public void topKInputSmallerThanK() {
- float[] ratings = {.7f, 2f};
+ float[] ratings = {0.7f, 2.0f};
List<RecommendedItem> topItems = findTop(ratings, 3);
assertEquals(2, topItems.size());
- assertEquals(1l, topItems.get(0).getItemID());
- assertEquals(2f, topItems.get(0).getValue(), TasteTestCase.EPSILON);
- assertEquals(0l, topItems.get(1).getItemID());
- assertEquals(.7f, topItems.get(1).getValue(), TasteTestCase.EPSILON);
+ assertEquals(1L, topItems.get(0).getItemID());
+ assertEquals(2.0f, topItems.get(0).getValue(), MahoutTestCase.EPSILON);
+ assertEquals(0L, topItems.get(1).getItemID());
+ assertEquals(0.7f, topItems.get(1).getValue(), MahoutTestCase.EPSILON);
}
- private List<RecommendedItem> findTop(float[] ratings, int k) {
+ private static List<RecommendedItem> findTop(float[] ratings, int k) {
TopItemQueue queue = new TopItemQueue(k);
for (int item = 0; item < ratings.length; item++) {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java Tue Mar 12 23:10:14 2013
@@ -94,6 +94,7 @@ public final class NodeTest extends Maho
assertEquals(node, readNode());
}
+ @Test
public void testCategoricalNode() throws Exception {
Node node = new CategoricalNode(rng.nextInt(), new double[]{rng.nextDouble(),
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java Tue Mar 12 23:10:14 2013
@@ -210,8 +210,8 @@ public final class OnlineLogisticRegress
// select training and test data
List<Integer> train = order.subList(0, 100);
List<Integer> test = order.subList(100, 150);
- logger.warn("Training set = " + train);
- logger.warn("Test set = " + test);
+ logger.warn("Training set = {}", train);
+ logger.warn("Test set = {}", test);
// now train many times and collect information on accuracy each time
int[] correct = new int[test.size() + 1];
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Tue Mar 12 23:10:14 2013
@@ -68,6 +68,7 @@ public final class DummyStatusReporter e
public void setStatus(String status) {
}
+ @Override
public float getProgress() {
return 0.0f;
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestVectorDistanceSimilarityJob.java Tue Mar 12 23:10:14 2013
@@ -226,7 +226,7 @@ public class TestVectorDistanceSimilarit
}
}
- private List<VectorWritable> getPointsWritable(double[][] raw) {
+ private static List<VectorWritable> getPointsWritable(double[][] raw) {
List<VectorWritable> points = Lists.newArrayList();
for (double[] fr : raw) {
Vector vec = new RandomAccessSparseVector(fr.length);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java Tue Mar 12 23:10:14 2013
@@ -59,13 +59,13 @@ public class LocalSSVDSolverDenseTest ex
runSSVDSolver(1);
}
- /*
- * remove from active tests to save time.
- */
- /* @Test */
+ // remove from active tests to save time.
+ /*
+ @Test
public void testSSVDSolverPowerIterations2() throws IOException {
runSSVDSolver(2);
}
+ */
public void runSSVDSolver(int q) throws IOException {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Tue Mar 12 23:10:14 2013
@@ -56,13 +56,13 @@ public class LocalSSVDSolverSparseSequen
private static final double s_epsilon = 1.0E-10d;
- /*
- * removing from tests to reduce test running time
- */
- /* @Test */
+ // removing from tests to reduce test running time
+ /*
+ @Test
public void testSSVDSolverSparse() throws IOException {
runSSVDSolver(0);
}
+ */
@Test
public void testSSVDSolverPowerIterations1() throws IOException {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java Tue Mar 12 23:10:14 2013
@@ -35,7 +35,7 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.stochasticsvd.qr.GramSchmidt;
-public class SSVDTestsHelper {
+public final class SSVDTestsHelper {
private SSVDTestsHelper() {
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/EstimateConverter.java Tue Mar 12 23:10:14 2013
@@ -20,7 +20,7 @@ package org.apache.mahout.cf.taste.examp
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class EstimateConverter {
+public final class EstimateConverter {
private static final Logger log = LoggerFactory.getLogger(EstimateConverter.class);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java Tue Mar 12 23:10:14 2013
@@ -44,7 +44,7 @@ import java.io.OutputStream;
* needs at least 6-7GB of memory, tested with -Xms6700M -Xmx6700M
*
*/
-public class Track1SVDRunner {
+public final class Track1SVDRunner {
private static final Logger log = LoggerFactory.getLogger(Track1SVDRunner.class);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java Tue Mar 12 23:10:14 2013
@@ -356,7 +356,7 @@ public class DisplayMinHash extends Disp
}
list.add(next.getSecond().get());
}
- log.info("Loaded: " + clusters.size() + " clusters");
+ log.info("Loaded: {} clusters", clusters.size());
}
private static void runMinHash(Configuration conf, Path samples, Path output)
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Tue Mar 12 23:10:14 2013
@@ -47,7 +47,7 @@ public final class MailArchivesClusterin
// HTML tags, and Java keywords asmany of the messages in the archives
// are subversion check-in notifications
-private static CharArraySet stopSet = new CharArraySet(LUCENE_VERSION, Arrays.asList(
+ private static final CharArraySet stopSet = new CharArraySet(LUCENE_VERSION, Arrays.asList(
"3d","7bit","a0","about","above","abstract","across","additional","after",
"afterwards","again","against","align","all","almost","alone","along",
"already","also","although","always","am","among","amongst","amoungst",
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java Tue Mar 12 23:10:14 2013
@@ -45,7 +45,7 @@ public class WikipediaAnalyzer extends S
Tokenizer tokenizer = new WikipediaTokenizer(reader);
TokenStream result = new StandardFilter(Version.LUCENE_41, tokenizer);
result = new LowerCaseFilter(Version.LUCENE_41, result);
- result = new StopFilter(Version.LUCENE_41, result, super.getStopwordSet());
+ result = new StopFilter(Version.LUCENE_41, result, getStopwordSet());
return new TokenStreamComponents(tokenizer, result);
}
}
\ No newline at end of file
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Tue Mar 12 23:10:14 2013
@@ -114,7 +114,7 @@ public final class VectorDumper extends
boolean sortVectors = hasOption("sortVectors");
boolean quiet = hasOption("quiet");
if (!quiet) {
- log.info("Sort? " + sortVectors);
+ log.info("Sort? {}", sortVectors);
}
String[] dictionary = null;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Tue Mar 12 23:10:14 2013
@@ -88,8 +88,8 @@ public class ClusterLabels {
private String idField;
private final Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints;
private String output;
- private int minNumIds;
- private int maxLabels;
+ private final int minNumIds;
+ private final int maxLabels;
public ClusterLabels(Path seqFileDir,
Path pointsDir,
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java Tue Mar 12 23:10:14 2013
@@ -22,7 +22,6 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.utils.vectors.TermInfo;
import org.apache.mahout.vectorizer.Weight;
-import java.io.IOException;
import java.util.Iterator;
/**
@@ -75,10 +74,6 @@ public final class LuceneIterable implem
@Override
public Iterator<Vector> iterator() {
- try {
- return new LuceneIterator(indexReader, idField, field, terminfo, weight, normPower, maxPercentErrorDocs);
- } catch (IOException e) {
- throw new IllegalStateException(e);
- }
+ return new LuceneIterator(indexReader, idField, field, terminfo, weight, normPower, maxPercentErrorDocs);
}
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java Tue Mar 12 23:10:14 2013
@@ -58,7 +58,7 @@ public final class LuceneIterator extend
private final Bump125 bump = new Bump125();
private long nextLogRecord = bump.increment();
private int skippedErrorMessages = 0;
- private Weight weight;
+ private final Weight weight;
/**
* Produce a LuceneIterable that can create the Vector plus normalize it.
@@ -76,7 +76,7 @@ public final class LuceneIterator extend
String field,
TermInfo terminfo,
Weight weight,
- double normPower) throws IOException {
+ double normPower) {
this(indexReader, idField, field, terminfo, weight, normPower, 0.0);
}
@@ -97,7 +97,7 @@ public final class LuceneIterator extend
TermInfo terminfo,
Weight weight,
double normPower,
- double maxPercentErrorDocs) throws IOException {
+ double maxPercentErrorDocs) {
// term docs(null) is a better way of iterating all the docs in Lucene
Preconditions.checkArgument(normPower == LuceneIterable.NO_NORMALIZING || normPower >= 0,
"If specified normPower must be nonnegative", normPower);
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Tue Mar 12 23:10:14 2013
@@ -24,8 +24,6 @@ import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -45,18 +43,12 @@ import org.apache.mahout.common.distance
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.common.kernel.IKernelProfile;
import org.apache.mahout.common.kernel.TriangularKernelProfile;
-import org.apache.mahout.math.DenseMatrix;
-import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.hadoop.DistributedRowMatrix;
-import org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver;
-import org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob;
import org.apache.mahout.utils.clustering.ClusterDumper;
import org.apache.mahout.utils.vectors.TermEntry;
import org.apache.mahout.utils.vectors.TermInfo;
@@ -287,7 +279,8 @@ public final class TestClusterDumper ext
clusterDumper.printClusters(termDictionary);
}
- // @Test
+ /*
+ @Test
public void testKmeansSVD() throws Exception {
DistanceMeasure measure = new EuclideanDistanceMeasure();
Path output = getTestTempDirPath("output");
@@ -351,7 +344,7 @@ public final class TestClusterDumper ext
clusterDumper.printClusters(termDictionary);
}
- // @Test
+ @Test
public void testKmeansDSVD() throws Exception {
DistanceMeasure measure = new EuclideanDistanceMeasure();
Path output = getTestTempDirPath("output");
@@ -393,7 +386,7 @@ public final class TestClusterDumper ext
clusterDumper.printClusters(termDictionary);
}
- // @Test
+ @Test
public void testKmeansDSVD2() throws Exception {
DistanceMeasure measure = new EuclideanDistanceMeasure();
Path output = getTestTempDirPath("output");
@@ -437,4 +430,5 @@ public final class TestClusterDumper ext
kmeansOutput, 10), new Path(kmeansOutput, "clusteredPoints"));
clusterDumper.printClusters(termDictionary);
}
+ */
}
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java Tue Mar 12 23:10:14 2013
@@ -19,12 +19,11 @@ package org.apache.mahout.utils.vectors.
import org.apache.mahout.utils.MahoutTestCase;
import org.junit.Test;
-public class ARFFTypeTest extends MahoutTestCase{
+public final class ARFFTypeTest extends MahoutTestCase {
@Test
public void removeQuotes() {
-
- assertEquals(null, ARFFType.removeQuotes(null));
+ assertNull(ARFFType.removeQuotes(null));
assertEquals("", ARFFType.removeQuotes("\"\""));
assertEquals("", ARFFType.removeQuotes("''"));
assertEquals("", ARFFType.removeQuotes(""));
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java Tue Mar 12 23:10:14 2013
@@ -22,7 +22,7 @@ package org.apache.mahout.math;
* <p>
* See also http://github.com/yonik/java_util for future updates to this file.
*/
-public class MurmurHash3 {
+public final class MurmurHash3 {
private MurmurHash3() {
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/OldQRDecomposition.java Tue Mar 12 23:10:14 2013
@@ -132,6 +132,7 @@ public class OldQRDecomposition implemen
*
* @return <tt>Q</tt>
*/
+ @Override
public Matrix getQ() {
int columns = Math.min(originalColumns, originalRows);
Matrix q = qr.like(originalRows, columns);
@@ -154,6 +155,7 @@ public class OldQRDecomposition implemen
*
* @return <tt>R</tt>
*/
+ @Override
public Matrix getR() {
int rows = Math.min(originalRows, originalColumns);
Matrix r = qr.like(rows, originalColumns);
@@ -176,6 +178,7 @@ public class OldQRDecomposition implemen
*
* @return true if <tt>R</tt>, and hence <tt>A</tt>, has full rank.
*/
+ @Override
public boolean hasFullRank() {
for (int j = 0; j < originalColumns; j++) {
if (rDiag.getQuick(j) == 0) {
@@ -192,6 +195,7 @@ public class OldQRDecomposition implemen
* @return <tt>X</tt> that minimizes the two norm of <tt>Q*R*X - B</tt>.
* @throws IllegalArgumentException if <tt>B.rows() != A.rows()</tt>.
*/
+ @Override
public Matrix solve(Matrix B) {
if (B.numRows() != originalRows) {
throw new IllegalArgumentException("Matrix row dimensions must agree.");
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java Tue Mar 12 23:10:14 2013
@@ -41,7 +41,8 @@ import java.util.Locale;
*/
public class QRDecomposition implements QR {
- private final Matrix q, r;
+ private final Matrix q;
+ private final Matrix r;
private final boolean fullRank;
private final int rows;
private final int columns;
Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java?rev=1455748&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java Tue Mar 12 23:10:14 2013
@@ -0,0 +1,143 @@
+/**
+ * <HTML>
+ * <BODY>
+ * Resizable lists holding objects or primitive data types such as <tt>int</tt>,
+ * <tt>double</tt>, etc. For non-resizable lists (1-dimensional matrices) see
+ * package <code>org.apache.mahout.math.matrix</code>.<p></p>
+ * <h1><a name="Overview"></a>Getting Started</h1>
+ * <h2>1. Overview</h2>
+ * <p>The list package offers flexible object oriented abstractions modelling dynamically
+ * resizing lists holding objects or primitive data types such as <tt>int</tt>,
+ * <tt>double</tt>, etc. It is designed to be scalable in terms of performance
+ * and memory requirements.</p>
+ * <p>Features include: </p>
+ * <p></p>
+ * <ul>
+ * <li>Lists operating on objects as well as all primitive data types such as <tt>int</tt>,
+ * <tt>double</tt>, etc.
+ * </li>
+ * <li>Compact representations</li>
+ * <li>A number of general purpose list operations including: adding, inserting,
+ * removing, iterating, searching, sorting, extracting ranges and copying. All
+ * operations are designed to perform well on mass data.
+ * </li>
+ * <li>Support for quick access to list elements. This is achieved by bounds-checking
+ * and non-bounds-checking accessor methods as well as zero-copy transformations
+ * to primitive arrays such as <tt>int[]</tt>, <tt>double[]</tt>, etc.
+ * </li>
+ * <li>Allows to use high level algorithms on primitive data types without any
+ * space and time overhead. Operations on primitive arrays, Colt lists and JAL
+ * algorithms can freely be mixed at zero copy overhead.
+ * </li>
+ * </ul>
+ * <p>File-based I/O can be achieved through the standard Java built-in serialization
+ * mechanism. All classes implement the {@link java.io.Serializable} interface.
+ * However, the toolkit is entirely decoupled from advanced I/O. It provides data
+ * structures and algorithms only.
+ * <p> This toolkit borrows concepts and terminology from the Javasoft <a
+ * href="http://www.javasoft.com/products/jdk/1.2/docs/guide/collections/index.html">
+ * Collections framework</a> written by Josh Bloch and introduced in JDK 1.2.
+ * <h2>2. Introduction</h2>
+ * <p>Lists are fundamental to virtually any application. Large scale resizable lists
+ * are, for example, used in scientific computations, simulations database management
+ * systems, to name just a few.</p>
+ * <h2></h2>
+ * <p>A list is a container holding elements that can be accessed via zero-based
+ * indexes. Lists may be implemented in different ways (most commonly with arrays).
+ * A resizable list automatically grows as elements are added. The lists of this
+ * package do not automatically shrink. Shrinking needs to be triggered by explicitly
+ * calling <tt>trimToSize()</tt> methods.</p>
+ * <p><i>Growing policy</i>: A list implemented with arrays initially has a certain
+ * <tt>initialCapacity</tt> - per default 10 elements, but customizable upon instance
+ * construction. As elements are added, this capacity may nomore be sufficient.
+ * When a list is automatically grown, its capacity is expanded to <tt>1.5*currentCapacity</tt>.
+ * Thus, excessive resizing (involving copying) is avoided.</p>
+ * <h4>Copying</h4>
+ * <p>
+ * <p>Any list can be copied. A copy is <i>equal</i> to the original but entirely
+ * independent of the original. So changes in the copy are not reflected in the
+ * original, and vice-versa.
+ * <h2>3. Organization of this package</h2>
+ * <p>Class naming follows the schema <tt><ElementType><ImplementationTechnique>List</tt>.
+ * For example, we have a {@link org.apache.mahout.math.list.DoubleArrayList}, which is a list
+ * holding <tt>double</tt> elements implemented with <tt>double</tt>[] arrays.
+ * </p>
+ * <p>The classes for lists of a given value type are derived from a common abstract
+ * base class tagged <tt>Abstract<ElementType></tt><tt>List</tt>. For example,
+ * all lists operating on <tt>double</tt> elements are derived from
+ * {@link org.apache.mahout.math.list.AbstractDoubleList},
+ * which in turn is derived from an abstract base class tying together all lists
+ * regardless of value type, {@link org.apache.mahout.math.list.AbstractList}. The abstract
+ * base classes provide skeleton implementations for all but few methods. Experimental
+ * data layouts (such as compressed, sparse, linked, etc.) can easily be implemented
+ * and inherit a rich set of functionality. Have a look at the javadoc <a href="package-tree.html">tree
+ * view</a> to get the broad picture.</p>
+ * <h2>4. Example usage</h2>
+ * <p>The following snippet fills a list, randomizes it, extracts the first half
+ * of the elements, sums them up and prints the result. It is implemented entirely
+ * with accessor methods.</p>
+ * <table>
+ * <td class="PRE">
+ * <pre>
+ * int s = 1000000;<br>AbstractDoubleList list = new DoubleArrayList();
+ * for (int i=0; i<s; i++) { list.add((double)i); }
+ * list.shuffle();
+ * AbstractDoubleList part = list.partFromTo(0,list.size()/2 - 1);
+ * double sum = 0.0;
+ * for (int i=0; i<part.size(); i++) { sum += part.get(i); }
+ * log.info(sum);
+ * </pre>
+ * </td>
+ * </table>
+ * <p> For efficiency, all classes provide back doors to enable getting/setting the
+ * backing array directly. In this way, the high level operations of these classes
+ * can be used where appropriate, and one can switch to <tt>[]</tt>-array index
+ * notations where necessary. The key methods for this are <tt>public <ElementType>[]
+ * elements()</tt> and <tt>public void elements(<ElementType>[])</tt>. The
+ * former trustingly returns the array it internally keeps to store the elements.
+ * Holding this array in hand, we can use the <tt>[]</tt>-array operator to
+ * perform iteration over large lists without needing to copy the array or paying
+ * the performance penalty introduced by accessor methods. Alternatively any JAL
+ * algorithm (or other algorithm) can operate on the returned primitive array.
+ * The latter method forces a list to internally hold a user provided array. Using
+ * this approach one can avoid needing to copy the elements into the list.
+ * <p>As a consequence, operations on primitive arrays, Colt lists and JAL algorithms
+ * can freely be mixed at zero-copy overhead.
+ * <p> Note that such special treatment certainly breaks encapsulation. This functionality
+ * is provided for performance reasons only and should only be used when absolutely
+ * necessary. Here is the above example in mixed notation:
+ * <table>
+ * <td class="PRE">
+ * <pre>
+ * int s = 1000000;<br>DoubleArrayList list = new DoubleArrayList(s); // list.size()==0, capacity==s
+ * list.setSize(s); // list.size()==s<br>double[] values = list.elements(); // zero copy, values.length==s<br>for (int i=0; i<s; i++) { values[i]=(double)i; }
+ * list.shuffle();
+ * double sum = 0.0;
+ * int limit = values.length/2;
+ * for (int i=0; i<limit; i++) { sum += values[i]; }
+ * log.info(sum);
+ * </pre>
+ * </td>
+ * </table>
+ * <p> Or even more compact using lists as algorithm objects:
+ * <table>
+ * <td class="PRE">
+ * <pre>
+ * int s = 1000000;<br>double[] values = new double[s];
+ * for (int i=0; i<s; i++) { values[i]=(double)i; }
+ * new DoubleArrayList(values).shuffle(); // zero-copy, shuffle via back door
+ * double sum = 0.0;
+ * int limit = values.length/2;
+ * for (int i=0; i<limit; i++) { sum += values[i]; }
+ * log.info(sum);
+ * </pre>
+ * </td>
+ * </table>
+ * <p>
+ * <h2>5. Notes </h2>
+ * <p>The quicksorts and mergesorts are the JDK 1.2 V1.26 algorithms, modified as
+ * necessary to operate on the given data types.
+ * </BODY>
+ * </HTML>
+ */
+package org.apache.mahout.math.list;
\ No newline at end of file
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java?rev=1455748&r1=1455747&r2=1455748&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java Tue Mar 12 23:10:14 2013
@@ -35,7 +35,7 @@ import java.util.Arrays;
* Memory requirements: 1 KB static memory.
*
*/
-public class PrimeFinder {
+public final class PrimeFinder {
/** The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>. */
public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java?rev=1455748&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java Tue Mar 12 23:10:14 2013
@@ -0,0 +1,248 @@
+/**
+ * <HTML>
+ * <BODY>
+ * Automatically growing and shrinking maps holding objects or primitive
+ * data types such as <tt>int</tt>, <tt>double</tt>, etc. Currently all maps are
+ * based upon hashing.
+ * <h2><a name="Overview"></a>1. Overview</h2>
+ * <p>The map package offers flexible object oriented abstractions modelling automatically
+ * resizing maps. It is designed to be scalable in terms of performance and memory
+ * requirements.</p>
+ * <p>Features include: </p>
+ * <p></p>
+ * <ul>
+ * <li>Maps operating on objects as well as all primitive data types such as <code>int</code>,
+ * <code>double</code>, etc.
+ * </li>
+ * <li>Compact representations</li>
+ * <li>Support for quick access to associations</li>
+ * <li>A number of general purpose map operations</li>
+ * </ul>
+ * <p>File-based I/O can be achieved through the standard Java built-in serialization
+ * mechanism. All classes implement the {@link java.io.Serializable} interface.
+ * However, the toolkit is entirely decoupled from advanced I/O. It provides data
+ * structures and algorithms only.
+ * <p> This toolkit borrows some terminology from the Javasoft <a
+ * href="http://www.javasoft.com/products/jdk/1.2/docs/guide/collections/index.html">
+ * Collections framework</a> written by Josh Bloch and introduced in JDK 1.2.
+ * <h2>2. Introduction</h2>
+ * <p>A map is an associative container that manages a set of (key,value) pairs.
+ * It is useful for implementing a collection of one-to-one mappings. A (key,value)
+ * pair is called an <i>association</i>. A value can be looked up up via its key.
+ * Associations can quickly be set, removed and retrieved. They are stored in a
+ * hashing structure based on the hash code of their keys, which is obtained by
+ * using a hash function. </p>
+ * <p> A map can, for example, contain <tt>Name-->Location</tt> associations like
+ * <tt>{("Pete", "Geneva"), ("Steve", "Paris"), ("Robert", "New York")}</tt> used
+ * in address books or <tt>Index-->Value</tt> mappings like <tt>{(0, 100), (3,
+ * 1000), (100000, 70)}</tt> representing sparse lists or matrices. For example
+ * this could mean at index 0 we have a value of 100, at index 3 we have a value
+ * of 1000, at index 1000000 we have a value of 70, and at all other indexes we
+ * have a value of, say, zero. Another example is a map of IP addresses to domain
+ * names (DNS). Maps can also be useful to represent<i> multi sets</i>, that is,
+ * sets where elements can occur more than once. For multi sets one would have
+ * <tt>Value-->Frequency</tt> mappings like <tt>{(100, 1), (50, 1000), (101, 3))}</tt>
+ * meaning element 100 occurs 1 time, element 50 occurs 1000 times, element 101
+ * occurs 3 times. Further, maps can also manage <tt>ObjectIdentifier-->Object</tt>
+ * mappings like <tt>{(12, obj1), (7, obj2), (10000, obj3), (9, obj4)}</tt> used
+ * in Object Databases.
+ * <p> A map cannot contain two or more <i>equal</i> keys; a key can map to at most
+ * one value. However, more than one key can map to identical values. For primitive
+ * data types "equality" of keys is defined as identity (operator <tt>==</tt>).
+ * For maps using <tt>Object</tt> keys, the meaning of "equality" can be specified
+ * by the user upon instance construction. It can either be defined to be identity
+ * (operator <tt>==</tt>) or to be given by the method {@link java.lang.Object#equals(Object)}.
+ * Associations of kind <tt>(AnyType,Object)</tt> can be of the form <tt>(AnyKey,null)
+ * </tt>, i.e. values can be <tt>null</tt>.
+ * <p> The classes of this package make no guarantees as to the order of the elements
+ * returned by iterators; in particular, they do not guarantee that the order will
+ * remain constant over time.
+ * <h2></h2>
+ * <h4>Copying</h4>
+ * <p>
+ * <p>Any map can be copied. A copy is <i>equal</i> to the original but entirely
+ * independent of the original. So changes in the copy are not reflected in the
+ * original, and vice-versa.
+ * <h2>3. Package organization</h2>
+ * <p>For most primitive data types and for objects there exists a separate map version.
+ * All versions are just the same, except that they operate on different data types.
+ * Colt includes two kinds of implementations for maps: The two different implementations
+ * are tagged <b>Chained</b> and <b>Open</b>.
+ * Note: Chained is no more included. Wherever it is mentioned it is of historic interest only.</p>
+ * <ul>
+ * <li><b>Chained</b> uses extendible separate chaining with chains holding unsorted
+ * dynamically linked collision lists.
+ * <li><b>Open</b> uses extendible open addressing with double hashing.
+ * </ul>
+ * <p>Class naming follows the schema <tt><Implementation><KeyType><ValueType>HashMap</tt>.
+ * For example, a {@link org.apache.mahout.math.map.OpenIntDoubleHashMap} holds <tt>(int-->double)</tt>
+ * associations and is implemented with open addressing. A {@link org.apache.mahout.math.map.OpenIntObjectHashMap}
+ * holds <tt>(int-->Object)</tt> associations and is implemented with open addressing.
+ * </p>
+ * <p>The classes for maps of a given (key,value) type are derived from a common
+ * abstract base class tagged <tt>Abstract<KeyType><ValueType></tt><tt>Map</tt>.
+ * For example, all maps operating on <tt>(int-->double)</tt> associations are
+ * derived from {@link org.apache.mahout.math.map.AbstractIntDoubleMap}, which in turn is derived
+ * from an abstract base class tying together all maps regardless of assocation
+ * type, {@link org.apache.mahout.math.set.AbstractSet}. The abstract base classes provide skeleton
+ * implementations for all but few methods. Experimental layouts (such as chaining,
+ * open addressing, extensible hashing, red-black-trees, etc.) can easily be implemented
+ * and inherit a rich set of functionality. Have a look at the javadoc <a href="package-tree.html">tree
+ * view</a> to get the broad picture.</p>
+ * <h2>4. Example usage</h2>
+ * <TABLE>
+ * <TD CLASS="PRE">
+ * <PRE>
+ * int[] keys = {0 , 3 , 100000, 9 };
+ * double[] values = {100.0, 1000.0, 70.0 , 71.0};
+ * AbstractIntDoubleMap map = new OpenIntDoubleHashMap();
+ * // add several associations
+ * for (int i=0; i < keys.length; i++) map.put(keys[i], values[i]);
+ * log.info("map="+map);
+ * log.info("size="+map.size());
+ * log.info(map.containsKey(3));
+ * log.info("get(3)="+map.get(3));
+ * log.info(map.containsKey(4));
+ * log.info("get(4)="+map.get(4));
+ * log.info(map.containsValue(71.0));
+ * log.info("keyOf(71.0)="+map.keyOf(71.0));
+ * // remove one association
+ * map.removeKey(3);
+ * log.info("\nmap="+map);
+ * log.info(map.containsKey(3));
+ * log.info("get(3)="+map.get(3));
+ * log.info(map.containsValue(1000.0));
+ * log.info("keyOf(1000.0)="+map.keyOf(1000.0));
+ * // clear
+ * map.clear();
+ * log.info("\nmap="+map);
+ * log.info("size="+map.size());
+ * </PRE>
+ * </TD>
+ * </TABLE>
+ * yields the following output
+ * <TABLE>
+ * <TD CLASS="PRE">
+ * <PRE>
+ * map=[0->100.0, 3->1000.0, 9->71.0, 100000->70.0]
+ * size=4
+ * true
+ * get(3)=1000.0
+ * false
+ * get(4)=0.0
+ * true
+ * keyOf(71.0)=9
+ * map=[0->100.0, 9->71.0, 100000->70.0]
+ * false
+ * get(3)=0.0
+ * false
+ * keyOf(1000.0)=-2147483648
+ * map=[]
+ * size=0
+ * </PRE>
+ * </TD>
+ * </TABLE>
+ * <h2> 5. Notes </h2>
+ * <p>
+ * Note that implementations are not synchronized.
+ * <p>
+ * Choosing efficient parameters for hash maps is not always easy.
+ * However, since parameters determine efficiency and memory requirements, here is a quick guide how to choose them.
+ * If your use case does not heavily operate on hash maps but uses them just because they provide
+ * convenient functionality, you can safely skip this section.
+ * For those of you who care, read on.
+ * <p>
+ * There are three parameters that can be customized upon map construction: <tt>initialCapacity</tt>,
+ * <tt>minLoadFactor</tt> and <tt>maxLoadFactor</tt>.
+ * The more memory one can afford, the faster a hash map.
+ * The hash map's capacity is the maximum number of associations that can be added without needing to allocate new
+ * internal memory.
+ * A larger capacity means faster adding, searching and removing.
+ * The <tt>initialCapacity</tt> corresponds to the capacity used upon instance construction.
+ * <p>
+ * The <tt>loadFactor</tt> of a hash map measures the degree of "fullness".
+ * It is given by the number of assocations (<tt>size()</tt>)
+ * divided by the hash map capacity <tt>(0.0 <= loadFactor <= 1.0)</tt>.
+ * The more associations are added, the larger the loadFactor and the more hash map performance degrades.
+ * Therefore, when the loadFactor exceeds a customizable threshold (<tt>maxLoadFactor</tt>), the hash map is
+ * automatically grown.
+ * In such a way performance degradation can be avoided.
+ * Similarly, when the loadFactor falls below a customizable threshold (<tt>minLoadFactor</tt>), the hash map is
+ * automatically shrinked.
+ * In such a way excessive memory consumption can be avoided.
+ * Automatic resizing (both growing and shrinking) obeys the following invariant:
+ * <p>
+ * <tt>capacity * minLoadFactor <= size() <= capacity * maxLoadFactor</tt>
+ * <p> The term <tt>capacity * minLoadFactor</tt> is called the <i>low water mark</i>,
+ * <tt>capacity * maxLoadFactor</tt> is called the <i>high water mark</i>. In other
+ * words, the number of associations may vary within the water mark constraints.
+ * When it goes out of range, the map is automatically resized and memory consumption
+ * changes proportionally.
+ * <ul>
+ * <li>To tune for memory at the expense of performance, both increase <tt>minLoadFactor</tt> and <tt>maxLoadFactor</tt>.
+ * <li>To tune for performance at the expense of memory, both decrease <tt>minLoadFactor</tt> and <tt>maxLoadFactor</tt>.
+ * As as special case set <tt>minLoadFactor=0</tt> to avoid any automatic shrinking.
+ * </ul>
+ * Resizing large hash maps can be time consuming, <tt>O(size())</tt>, and should be avoided if possible (maintaining
+ * primes is not the reason).
+ * Unnecessary growing operations can be avoided if the number of associations is known before they are added, or can be
+ * estimated.<p>
+ * In such a case good parameters are as follows:
+ * <p>
+ * <i>For chaining:</i>
+ * <br>Set the <tt>initialCapacity = 1.4*expectedSize</tt> or greater.
+ * <br>Set the <tt>maxLoadFactor = 0.8</tt> or greater.
+ * <p>
+ * <i>For open addressing:</i>
+ * <br>Set the <tt>initialCapacity = 2*expectedSize</tt> or greater. Alternatively call <tt>ensureCapacity(...)</tt>.
+ * <br>Set the <tt>maxLoadFactor = 0.5</tt>.
+ * <br>Never set <tt>maxLoadFactor > 0.55</tt>; open addressing exponentially slows down beyond that point.
+ * <p>
+ * In this way the hash map will never need to grow and still stay fast.
+ * It is never a good idea to set <tt>maxLoadFactor < 0.1</tt>,
+ * because the hash map would grow too often.
+ * If it is entirelly unknown how many associations the application will use,
+ * the default constructor should be used. The map will grow and shrink as needed.
+ * <p>
+ * <b>Comparision of chaining and open addressing</b>
+ * <p> Chaining is faster than open addressing, when assuming unconstrained memory
+ * consumption. Open addressing is more space efficient than chaining, because
+ * it does not create entry objects but uses primitive arrays which are considerably
+ * smaller. Entry objects consume significant amounts of memory compared to the
+ * information they actually hold. Open addressing also poses no problems to the
+ * garbage collector. In contrast, chaining can create millions of entry objects
+ * which are linked; a nightmare for any garbage collector. In addition, entry
+ * object creation is a bit slow. <br>
+ * Therefore, with the same amount of memory, or even less memory, hash maps with
+ * larger capacity can be maintained under open addressing, which yields smaller
+ * loadFactors, which in turn keeps performance competitive with chaining. In our
+ * benchmarks, using significantly less memory, open addressing usually is not
+ * more than 1.2-1.5 times slower than chaining.
+ * <p><b>Further readings</b>:
+ * <br>Knuth D., The Art of Computer Programming: Searching and Sorting, 3rd ed.
+ * <br>Griswold W., Townsend G., The Design and Implementation of Dynamic Hashing for Sets and Tables in Icon, Software -
+ * Practice and Experience, Vol. 23(4), 351-367 (April 1993).
+ * <br>Larson P., Dynamic hash tables, Comm. of the ACM, 31, (4), 1988.
+ * <p>
+ * <b>Performance:</b>
+ * <p>
+ * Time complexity:
+ * <br>The classes offer <i>expected</i> time complexity <tt>O(1)</tt> (i.e. constant time) for the basic operations
+ * <tt>put</tt>, <tt>get</tt>, <tt>removeKey</tt>, <tt>containsKey</tt> and <tt>size</tt>,
+ * assuming the hash function disperses the elements properly among the buckets.
+ * Otherwise, pathological cases, although highly improbable, can occur, degrading performance to <tt>O(N)</tt> in the
+ * worst case.
+ * Operations <tt>containsValue</tt> and <tt>keyOf</tt> are <tt>O(N)</tt>.
+ * <p>
+ * Memory requirements for <i>open addressing</i>:
+ * <br>worst case: <tt>memory [bytes] = (1/minLoadFactor) * size() * (1 + sizeOf(key) + sizeOf(value))</tt>.
+ * <br>best case: <tt>memory [bytes] = (1/maxLoadFactor) * size() * (1 + sizeOf(key) + sizeOf(value))</tt>.
+ * Where <tt>sizeOf(int) = 4</tt>, <tt>sizeOf(double) = 8</tt>, <tt>sizeOf(Object) = 4</tt>, etc.
+ * Thus, an <tt>OpenIntIntHashMap</tt> with minLoadFactor=0.25 and maxLoadFactor=0.5 and 1000000 associations uses
+ * between 17 MB and 34 MB.
+ * The same map with 1000 associations uses between 17 and 34 KB.
+ * <p>
+ * </BODY>
+ * </HTML>
+ */
+package org.apache.mahout.math.map;
\ No newline at end of file