You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/05/27 20:02:23 UTC
svn commit: r948935 [2/3] - in /mahout/trunk: buildtools/src/main/resources/
core/src/main/java/org/apache/mahout/cf/taste/eval/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/m...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Thu May 27 18:02:20 2010
@@ -483,7 +483,7 @@ public final class TreeClusteringRecomme
return "TreeClusteringRecommender2[clusterSimilarity:" + clusterSimilarity + ']';
}
- private class Estimator implements TopItems.Estimator<Long> {
+ private final class Estimator implements TopItems.Estimator<Long> {
private final FastIDSet cluster;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Thu May 27 18:02:20 2010
@@ -19,11 +19,11 @@ package org.apache.mahout.cf.taste.impl.
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.jdbc.AbstractJDBCDataModel;
-import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel;
/**
* <p>
- * MySQL-specific implementation. Should be used in conjunction with a {@link MySQLJDBCDataModel}. This
+ * MySQL-specific implementation. Should be used in conjunction with a
+ * {@link org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel}. This
* implementation stores item-item diffs in a MySQL database and encapsulates some other slope-one-specific
* operations that are needed on the preference data in the database. It assumes the database has a schema
* like:
@@ -170,7 +170,7 @@ public final class MySQLJDBCDiffStorage
}
/**
- * @see MySQLJDBCDataModel#getFetchSize()
+ * @see org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel#getFetchSize()
*/
@Override
protected int getFetchSize() {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java Thu May 27 18:02:20 2010
@@ -40,15 +40,14 @@ import org.apache.mahout.cf.taste.model.
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.common.RandomUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
- * A {@link Recommender} which uses Single Value Decomposition to find the main features of the data set.
- * Thanks to Simon Funk for the hints in the implementation.
+ * A {@link org.apache.mahout.cf.taste.recommender.Recommender} which uses Single Value Decomposition
+ * to find the main features of the data set. Thanks to Simon Funk for the hints in the implementation.
*/
public final class SVDRecommender extends AbstractRecommender {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java Thu May 27 18:02:20 2010
@@ -21,11 +21,11 @@ import javax.sql.DataSource;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.jdbc.AbstractJDBCComponent;
-import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
/**
* <p>
- * An {@link ItemSimilarity} backed by a MySQL database and accessed via JDBC. It may work with other JDBC
+ * An {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} backed by a MySQL database
+ * and accessed via JDBC. It may work with other JDBC
* databases. By default, this class assumes that there is a {@link DataSource} available under the JNDI name
* "jdbc/taste", which gives access to a database with a "taste_item_similarity" table with the following
* schema:
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java Thu May 27 18:02:20 2010
@@ -17,10 +17,6 @@
package org.apache.mahout.cf.taste.model;
-import java.util.List;
-
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
-import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
@@ -45,7 +41,7 @@ public interface DataModel extends Refre
* @param userID
* ID of user to get prefs for
* @return user's preferences, ordered by item ID
- * @throws NoSuchUserException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
* if the user does not exist
* @throws TasteException
* if an error occurs while accessing the data
@@ -56,7 +52,7 @@ public interface DataModel extends Refre
* @param userID
* ID of user to get prefs for
* @return IDs of items user expresses a preference for
- * @throws NoSuchUserException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
* if the user does not exist
* @throws TasteException
* if an error occurs while accessing the data
@@ -64,7 +60,7 @@ public interface DataModel extends Refre
FastIDSet getItemIDsFromUser(long userID) throws TasteException;
/**
- * @return a {@link List} of all item IDs in the model, in order
+ * @return a {@link LongPrimitiveIterator} of all item IDs in the model, in order
* @throws TasteException
* if an error occurs while accessing the data
*/
@@ -74,7 +70,7 @@ public interface DataModel extends Refre
* @param itemID
* item ID
* @return all existing {@link Preference}s expressed for that item, ordered by user ID, as an array
- * @throws NoSuchItemException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
* if the item does not exist
* @throws TasteException
* if an error occurs while accessing the data
@@ -89,7 +85,7 @@ public interface DataModel extends Refre
* @param itemID
* item ID to get pref value for
* @return preference value from the given user for the given item or null if none exists
- * @throws NoSuchUserException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
* if the user does not exist
* @throws TasteException
* if an error occurs while accessing the data
@@ -120,7 +116,7 @@ public interface DataModel extends Refre
* @throws IllegalArgumentException
* if itemIDs is null, empty, or larger than 2 elements since currently only queries of up to 2
* items are needed and supported
- * @throws NoSuchItemException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
* if an item does not exist
*/
int getNumUsersWithPreferenceFor(long... itemIDs) throws TasteException;
@@ -136,9 +132,9 @@ public interface DataModel extends Refre
* item to set preference for
* @param value
* preference value
- * @throws NoSuchItemException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
* if the item does not exist
- * @throws NoSuchUserException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
* if the user does not exist
* @throws TasteException
* if an error occurs while accessing the data
@@ -154,9 +150,9 @@ public interface DataModel extends Refre
* user from which to remove preference
* @param itemID
* item to remove preference for
- * @throws NoSuchItemException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
* if the item does not exist
- * @throws NoSuchUserException
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
* if the user does not exist
* @throws TasteException
* if an error occurs while accessing the data
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java Thu May 27 18:02:20 2010
@@ -32,7 +32,6 @@ import org.apache.hadoop.mapred.Reporter
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.mahout.classifier.BayesFileFormatter;
import org.apache.mahout.common.Parameters;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.iterator.ArrayIterator;
@@ -43,7 +42,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Reads the input train set(preprocessed using the {@link BayesFileFormatter}).
+ * Reads the input train set(preprocessed using the {@link org.apache.mahout.classifier.BayesFileFormatter}).
*/
public class BayesFeatureMapper extends MapReduceBase implements Mapper<Text,Text,StringTuple,DoubleWritable> {
@@ -101,7 +100,7 @@ public class BayesFeatureMapper extends
}
}
}
- final MutableDouble lengthNormalisationMut = new MutableDouble(0);
+ final MutableDouble lengthNormalisationMut = new MutableDouble(0.0);
wordList.forEachPair(new ObjectIntProcedure<String>() {
@Override
public boolean apply(String word, int dKJ) {
@@ -181,7 +180,7 @@ public class BayesFeatureMapper extends
public IteratorTokenStream(Iterator<String> iterator) {
this.iterator = iterator;
- this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.termAtt = addAttribute(TermAttribute.class);
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Thu May 27 18:02:20 2010
@@ -25,7 +25,6 @@ import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
-import org.apache.commons.cli2.util.HelpFormatter;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Thu May 27 18:02:20 2010
@@ -25,8 +25,6 @@ import org.apache.commons.cli2.CommandLi
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.conf.Configurable;
@@ -55,7 +53,7 @@ import org.apache.mahout.math.VectorWrit
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class DirichletDriver {
+public final class DirichletDriver {
public static final String STATE_IN_KEY = "org.apache.mahout.clustering.dirichlet.stateIn";
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Thu May 27 18:02:20 2010
@@ -25,8 +25,6 @@ import org.apache.commons.cli2.CommandLi
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.conf.Configuration;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Thu May 27 18:02:20 2010
@@ -34,7 +34,7 @@ import org.apache.mahout.clustering.kmea
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-class FuzzyKMeansUtil {
+final class FuzzyKMeansUtil {
private static final Logger log = LoggerFactory.getLogger(FuzzyKMeansUtil.class);
private FuzzyKMeansUtil() {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Thu May 27 18:02:20 2010
@@ -117,7 +117,7 @@ public class KMeansClusterer {
boolean converged = false;
int iteration = 0;
while (!converged && iteration < maxIter) {
- System.out.println("iteration: " + iteration);
+ //System.out.println("iteration: " + iteration);
List<Cluster> next = new ArrayList<Cluster>();
List<Cluster> cs = clustersList.get(iteration++);
for (Cluster c : cs) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Thu May 27 18:02:20 2010
@@ -22,8 +22,6 @@ import org.apache.commons.cli2.CommandLi
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.FileStatus;
@@ -43,7 +41,6 @@ import org.apache.mahout.clustering.Weig
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAMapper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAMapper.java Thu May 27 18:02:20 2010
@@ -39,8 +39,9 @@ public class LDAMapper extends Mapper<Wr
private LDAInference infer;
@Override
- public void map(WritableComparable<?> key, VectorWritable wordCountsWritable, Context context) throws IOException,
- InterruptedException {
+ protected void map(WritableComparable<?> key,
+ VectorWritable wordCountsWritable,
+ Context context) throws IOException, InterruptedException {
Vector wordCounts = wordCountsWritable.get();
LDAInference.InferredDocument doc = null;
try {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java Thu May 27 18:02:20 2010
@@ -23,7 +23,6 @@ import java.io.IOException;
import java.lang.reflect.Type;
import org.apache.mahout.clustering.ClusterBase;
-import org.apache.mahout.math.CardinalityException;
import org.apache.mahout.math.JsonVectorAdapter;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
@@ -109,7 +108,7 @@ public class MeanShiftCanopy extends Clu
* a Vector to add
* @param nPoints
* the number of times to add the point
- * @throws CardinalityException
+ * @throws org.apache.mahout.math.CardinalityException
* if the cardinalities disagree
*/
void addPoints(Vector point, int nPoints) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Thu May 27 18:02:20 2010
@@ -198,7 +198,7 @@ public class MeanShiftCanopyClusterer {
clusterer.mergeCanopy(canopy, migratedCanopies);
}
canopies = migratedCanopies;
- System.out.println(iter++);
+ //System.out.println(iter++);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Thu May 27 18:02:20 2010
@@ -23,8 +23,6 @@ import org.apache.commons.cli2.CommandLi
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.conf.Configurable;
@@ -45,7 +43,6 @@ import org.apache.mahout.clustering.fuzz
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/IntPairWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntPairWritable.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/IntPairWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/IntPairWritable.java Thu May 27 18:02:20 2010
@@ -32,7 +32,7 @@ import org.apache.hadoop.io.WritableComp
*/
public final class IntPairWritable
extends BinaryComparable
- implements WritableComparable<BinaryComparable>, Serializable {
+ implements WritableComparable<BinaryComparable>, Serializable, Cloneable {
static final int INT_BYTE_LENGTH = 4;
static final int INT_PAIR_BYTE_LENGTH = 2 * INT_BYTE_LENGTH;
@@ -130,7 +130,7 @@ public final class IntPairWritable
private static void putInt(int value, byte[] b, int offset) {
for (int i = offset, j = 24; j >= 0; i++, j -= 8) {
- b[i] = (byte) (value >>> j);
+ b[i] = (byte) (value >> j);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java Thu May 27 18:02:20 2010
@@ -88,7 +88,7 @@ public final class TimingStatistics impl
return new Call();
}
- public class Call {
+ public final class Call {
private final long startTime = System.nanoTime();
private Call() { }
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java Thu May 27 18:02:20 2010
@@ -105,8 +105,7 @@ public class LFUCache<K,V> implements Ca
if (dataMap.containsKey(key)) {
return;
}
- if (capacity == dataMap.size()) // Cache Full
- {
+ if (capacity == dataMap.size()) { // Cache Full
removeLeastFrequent();
}
MutableLong count = new MutableLong(1L);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Thu May 27 18:02:20 2010
@@ -33,7 +33,8 @@ public final class DefaultOptionCreator
* Returns a default command line option for help. Used by all clustering jobs and many others
* */
public static Option helpOption() {
- return new DefaultOptionBuilder().withLongName("help").withDescription("Print out help").withShortName("h").create();
+ return new DefaultOptionBuilder().withLongName("help").withDescription("Print out help")
+ .withShortName("h").create();
}
/**
@@ -51,7 +52,8 @@ public final class DefaultOptionCreator
public static DefaultOptionBuilder clustersInOption() {
return new DefaultOptionBuilder().withLongName("clusters").withRequired(true).withArgument(
new ArgumentBuilder().withName("clusters").withMinimum(1).withMaximum(1).create()).withDescription(
- "The path to the initial clusters directory. Must be a SequenceFile of some type of Cluster").withShortName("c");
+ "The path to the initial clusters directory. Must be a SequenceFile of some type of Cluster")
+ .withShortName("c");
}
/**
@@ -72,12 +74,15 @@ public final class DefaultOptionCreator
}
/**
- * Returns a default command line option for specification of distance measure class to use. Used by Canopy, FuzzyKmeans, Kmeans, MeanShift
+ * Returns a default command line option for specification of distance measure class to use.
+ * Used by Canopy, FuzzyKmeans, Kmeans, MeanShift
*/
public static DefaultOptionBuilder distanceMeasureOption() {
- return new DefaultOptionBuilder().withLongName("distanceMeasure").withRequired(false).withShortName("dm").withArgument(
- new ArgumentBuilder().withName("distanceMeasure").withDefault(SquaredEuclideanDistanceMeasure.class.getName()).withMinimum(
- 1).withMaximum(1).create()).withDescription("The classname of the DistanceMeasure. Default is SquaredEuclidean");
+ return new DefaultOptionBuilder().withLongName("distanceMeasure").withRequired(false).withShortName("dm")
+ .withArgument(new ArgumentBuilder().withName("distanceMeasure")
+ .withDefault(SquaredEuclideanDistanceMeasure.class.getName())
+ .withMinimum(1).withMaximum(1).create())
+ .withDescription("The classname of the DistanceMeasure. Default is SquaredEuclidean");
}
/**
@@ -85,8 +90,8 @@ public final class DefaultOptionCreator
*/
public static DefaultOptionBuilder t1Option() {
return new DefaultOptionBuilder().withLongName("t1").withRequired(true).withArgument(
- new ArgumentBuilder().withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("T1 threshold value")
- .withShortName("t1");
+ new ArgumentBuilder().withName("t1").withMinimum(1).withMaximum(1).create())
+ .withDescription("T1 threshold value").withShortName("t1");
}
/**
@@ -94,22 +99,25 @@ public final class DefaultOptionCreator
*/
public static DefaultOptionBuilder t2Option() {
return new DefaultOptionBuilder().withLongName("t2").withRequired(true).withArgument(
- new ArgumentBuilder().withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("T2 threshold value")
+ new ArgumentBuilder().withName("t2").withMinimum(1).withMaximum(1).create())
+ .withDescription("T2 threshold value")
.withShortName("t2");
}
/**
- * Returns a default command line option for specification of max number of iterations. Used by Dirichlet, FuzzyKmeans, Kmeans, LDA
+ * Returns a default command line option for specification of max number of iterations.
+ * Used by Dirichlet, FuzzyKmeans, Kmeans, LDA
*/
public static DefaultOptionBuilder maxIterationsOption() {
// default value used by LDA which overrides withRequired(false)
return new DefaultOptionBuilder().withLongName("maxIter").withRequired(true).withShortName("x").withArgument(
- new ArgumentBuilder().withName("maxIter").withDefault("-1").withMinimum(1).withMaximum(1).create()).withDescription(
- "The maximum number of iterations.");
+ new ArgumentBuilder().withName("maxIter").withDefault("-1").withMinimum(1).withMaximum(1).create())
+ .withDescription("The maximum number of iterations.");
}
/**
- * Returns a default command line option for specification of numbers of clusters to create. Used by Dirichlet, FuzzyKmeans, Kmeans
+ * Returns a default command line option for specification of numbers of clusters to create.
+ * Used by Dirichlet, FuzzyKmeans, Kmeans
*/
public static DefaultOptionBuilder kOption() {
return new DefaultOptionBuilder().withLongName("k").withRequired(false).withArgument(
@@ -118,11 +126,13 @@ public final class DefaultOptionCreator
}
/**
- * Returns a default command line option for convergence delta specification. Used by FuzzyKmeans, Kmeans, MeanShift
+ * Returns a default command line option for convergence delta specification.
+ * Used by FuzzyKmeans, Kmeans, MeanShift
*/
public static DefaultOptionBuilder convergenceOption() {
- return new DefaultOptionBuilder().withLongName("convergenceDelta").withRequired(false).withShortName("cd").withArgument(
- new ArgumentBuilder().withName("convergenceDelta").withDefault("0.5").withMinimum(1).withMaximum(1).create())
+ return new DefaultOptionBuilder().withLongName("convergenceDelta").withRequired(false)
+ .withShortName("cd").withArgument(new ArgumentBuilder().withName("convergenceDelta")
+ .withDefault("0.5").withMinimum(1).withMaximum(1).create())
.withDescription("The convergence delta value. Default is 0.5");
}
@@ -131,27 +141,29 @@ public final class DefaultOptionCreator
*/
public static DefaultOptionBuilder alphaOption() {
return new DefaultOptionBuilder().withLongName("alpha").withRequired(false).withShortName("m").withArgument(
- new ArgumentBuilder().withName("alpha").withDefault("1.0").withMinimum(1).withMaximum(1).create()).withDescription(
- "The alpha0 value for the DirichletDistribution. Defaults to 1.0");
+ new ArgumentBuilder().withName("alpha").withDefault("1.0").withMinimum(1).withMaximum(1).create())
+ .withDescription("The alpha0 value for the DirichletDistribution. Defaults to 1.0");
}
/**
* Returns a default command line option for model distribution class specification. Used by Dirichlet
*/
public static DefaultOptionBuilder modelDistributionOption() {
- return new DefaultOptionBuilder().withLongName("modelDistClass").withRequired(false).withShortName("md").withArgument(
- new ArgumentBuilder().withName("modelDistClass").withDefault(NormalModelDistribution.class.getName()).withMinimum(1)
- .withMaximum(1).create()).withDescription("The ModelDistribution class name. " + "Defaults to NormalModelDistribution");
+ return new DefaultOptionBuilder().withLongName("modelDistClass").withRequired(false).withShortName("md")
+ .withArgument(new ArgumentBuilder().withName("modelDistClass")
+ .withDefault(NormalModelDistribution.class.getName()).withMinimum(1).withMaximum(1).create())
+ .withDescription("The ModelDistribution class name. " + "Defaults to NormalModelDistribution");
}
/**
* Returns a default command line option for model prototype class specification. Used by Dirichlet
*/
public static DefaultOptionBuilder modelPrototypeOption() {
- return new DefaultOptionBuilder().withLongName("modelPrototypeClass").withRequired(false).withShortName("mp").withArgument(
- new ArgumentBuilder().withName("prototypeClass").withDefault(RandomAccessSparseVector.class.getName()).withMinimum(1)
+ return new DefaultOptionBuilder().withLongName("modelPrototypeClass").withRequired(false).withShortName("mp")
+ .withArgument(new ArgumentBuilder().withName("prototypeClass")
+ .withDefault(RandomAccessSparseVector.class.getName()).withMinimum(1)
.withMaximum(1).create()).withDescription(
- "The ModelDistribution prototype Vector class name. " + "Defaults to RandomAccessSparseVector");
+ "The ModelDistribution prototype Vector class name. Defaults to RandomAccessSparseVector");
}
/**
@@ -159,17 +171,18 @@ public final class DefaultOptionCreator
*/
public static DefaultOptionBuilder numMappersOption() {
return new DefaultOptionBuilder().withLongName("numMap").withRequired(false).withArgument(
- new ArgumentBuilder().withName("numMap").withDefault("10").withMinimum(1).withMaximum(1).create()).withDescription(
- "The number of map tasks. Defaults to 10").withShortName("u");
+ new ArgumentBuilder().withName("numMap").withDefault("10").withMinimum(1).withMaximum(1).create())
+ .withDescription("The number of map tasks. Defaults to 10").withShortName("u");
}
/**
- * Returns a default command line option for specifying the max number of reducers. Used by Dirichlet, FuzzyKmeans, Kmeans and LDA
+ * Returns a default command line option for specifying the max number of reducers.
+ * Used by Dirichlet, FuzzyKmeans, Kmeans and LDA
*/
public static DefaultOptionBuilder numReducersOption() {
return new DefaultOptionBuilder().withLongName("maxRed").withRequired(false).withShortName("r").withArgument(
- new ArgumentBuilder().withName("maxRed").withDefault("2").withMinimum(1).withMaximum(1).create()).withDescription(
- "The number of reduce tasks. Defaults to 2");
+ new ArgumentBuilder().withName("maxRed").withDefault("2").withMinimum(1).withMaximum(1).create())
+ .withDescription("The number of reduce tasks. Defaults to 2");
}
/**
@@ -184,19 +197,22 @@ public final class DefaultOptionCreator
* Returns a default command line option for specifying the emitMostLikely flag. Used by Dirichlet and FuzzyKmeans
*/
public static DefaultOptionBuilder emitMostLikelyOption() {
- return new DefaultOptionBuilder().withLongName("emitMostLikely").withRequired(false).withShortName("e").withArgument(
- new ArgumentBuilder().withName("emitMostLikely").withDefault("true").withMinimum(1).withMaximum(1).create())
- .withDescription(
- "True if clustering should emit the most likely point only, false for threshold clustering. Default is true");
+ return new DefaultOptionBuilder().withLongName("emitMostLikely").withRequired(false).withShortName("e")
+ .withArgument(new ArgumentBuilder().withName("emitMostLikely").withDefault("true")
+ .withMinimum(1).withMaximum(1).create()).withDescription(
+ "True if clustering should emit the most likely point only, " +
+ "false for threshold clustering. Default is true");
}
/**
- * Returns a default command line option for specifying the clustering threshold value. Used by Dirichlet and FuzzyKmeans
+ * Returns a default command line option for specifying the clustering threshold value.
+ * Used by Dirichlet and FuzzyKmeans
*/
public static DefaultOptionBuilder thresholdOption() {
- return new DefaultOptionBuilder().withLongName("threshold").withRequired(false).withShortName("t").withArgument(
- new ArgumentBuilder().withName("threshold").withDefault("0").withMinimum(1).withMaximum(1).create()).withDescription(
- "The pdf threshold used for cluster determination. Default is 0");
+ return new DefaultOptionBuilder().withLongName("threshold").withRequired(false).withShortName("t")
+ .withArgument(new ArgumentBuilder().withName("threshold").withDefault("0")
+ .withMinimum(1).withMaximum(1).create())
+ .withDescription("The pdf threshold used for cluster determination. Default is 0");
}
/**
@@ -209,11 +225,13 @@ public final class DefaultOptionCreator
}
/**
- * Returns a default command line option for specifying that the MeanShift input directory already contains Canopies vs. Vectors
+ * Returns a default command line option for specifying that the MeanShift input directory already
+ * contains Canopies vs. Vectors
*/
public static DefaultOptionBuilder inputIsCanopiesOption() {
- return new DefaultOptionBuilder().withLongName("inputIsCanopies").withRequired(false).withShortName("ic").withArgument(
- new ArgumentBuilder().withName("inputIsCanopies").withMinimum(1).withMaximum(1).create()).withDescription(
+ return new DefaultOptionBuilder().withLongName("inputIsCanopies").withRequired(false)
+ .withShortName("ic").withArgument(new ArgumentBuilder().withName("inputIsCanopies")
+ .withMinimum(1).withMaximum(1).create()).withDescription(
"If present, the input directory already contains MeanShiftCanopies");
}
@@ -232,7 +250,8 @@ public final class DefaultOptionCreator
public static DefaultOptionBuilder numWordsOption() {
return new DefaultOptionBuilder().withLongName("numWords").withRequired(true).withArgument(
new ArgumentBuilder().withName("numWords").withMinimum(1).withMaximum(1).create()).withDescription(
- "The total number of words in the corpus (can be approximate, needs to exceed the actual value)").withShortName("v");
+ "The total number of words in the corpus (can be approximate, needs to exceed the actual value)")
+ .withShortName("v");
}
/**
@@ -240,7 +259,8 @@ public final class DefaultOptionCreator
*/
public static DefaultOptionBuilder topicSmoothingOption() {
return new DefaultOptionBuilder().withLongName("topicSmoothing").withRequired(false).withArgument(
- new ArgumentBuilder().withName("topicSmoothing").withDefault(-1.0).withMinimum(0).withMaximum(1).create()).withDescription(
+ new ArgumentBuilder().withName("topicSmoothing").withDefault(-1.0)
+ .withMinimum(0).withMaximum(1).create()).withDescription(
"Topic smoothing parameter. Default is 50/numTopics.").withShortName("a");
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java Thu May 27 18:02:20 2010
@@ -39,8 +39,8 @@ public class TanimotoDistanceMeasure ext
*/
@Override
public double distance(Vector a, Vector b) {
- double ab = 0;
- double denominator = 0;
+ double ab;
+ double denominator;
if (getWeights() != null) {
ab = dot(b, a); // b is SequentialAccess
denominator = dot(a, a) + dot(b, b) - ab;
@@ -54,9 +54,9 @@ public class TanimotoDistanceMeasure ext
}
if (denominator > 0) {
// denom == 0 only when dot(a,a) == dot(b,b) == dot(a,b) == 0
- return 1 - ab / denominator;
+ return 1.0 - ab / denominator;
} else {
- return 0;
+ return 0.0;
}
}
@@ -65,7 +65,7 @@ public class TanimotoDistanceMeasure ext
Iterator<Vector.Element> it = a.iterateNonZero();
Vector.Element el;
Vector weights = getWeights();
- double dot = 0;
+ double dot = 0.0;
while (it.hasNext() && (el = it.next()) != null) {
double elementValue = el.get();
double value = elementValue * (sameVector ? elementValue : b.getQuick(el.index()));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StableFixedSizeSamplingIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StableFixedSizeSamplingIterator.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StableFixedSizeSamplingIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StableFixedSizeSamplingIterator.java Thu May 27 18:02:20 2010
@@ -56,7 +56,7 @@ public class StableFixedSizeSamplingIter
return new DelegateIterator(buf);
}
- private static class Entry<T> implements Comparable<Entry<T>> {
+ private static final class Entry<T> implements Comparable<Entry<T>> {
private final int originalIndex;
private T value;
@@ -88,7 +88,7 @@ public class StableFixedSizeSamplingIter
}
}
- private static class DelegateIterator<T> implements Iterator<T> {
+ private static final class DelegateIterator<T> implements Iterator<T> {
private final Iterator<Entry<T>> iterator;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parametered.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parametered.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parametered.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parametered.java Thu May 27 18:02:20 2010
@@ -48,7 +48,7 @@ public interface Parametered extends Job
void createParameters(String prefix, JobConf jobConf);
/** "multiple inheritance" */
- class ParameteredGeneralizations {
+ final class ParameteredGeneralizations {
private ParameteredGeneralizations() { }
public static void configureParameters(Parametered parametered, JobConf jobConf) {
@@ -99,7 +99,7 @@ public interface Parametered extends Job
return new Conf(parametered).toString();
}
- private static class Help {
+ private static final class Help {
private final StringBuilder sb;
@Override
@@ -158,7 +158,7 @@ public interface Parametered extends Job
}
}
- private static class Conf {
+ private static final class Conf {
private final StringBuilder sb;
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/DFUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/DFUtils.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/DFUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/DFUtils.java Thu May 27 18:02:20 2010
@@ -32,7 +32,7 @@ import org.apache.mahout.ga.watchmaker.O
/**
* Utility class that contains various helper methods
*/
-public class DFUtils {
+public final class DFUtils {
private DFUtils() { }
/**
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/ErrorEstimate.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/ErrorEstimate.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/ErrorEstimate.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/ErrorEstimate.java Thu May 27 18:02:20 2010
@@ -20,7 +20,7 @@ package org.apache.mahout.df;
/**
* various methods to compute from the output of a random forest
*/
-public class ErrorEstimate {
+public final class ErrorEstimate {
private ErrorEstimate() {}
public static double errorRate(int[] labels, int[] predictions) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java Thu May 27 18:02:20 2010
@@ -42,7 +42,7 @@ import org.slf4j.LoggerFactory;
* adds an IGNORED first attribute that will contain a unique id for each instance, which is the line number
* of the instance in the input data
*/
-public class DataLoader {
+public final class DataLoader {
private static final Logger log = LoggerFactory.getLogger(DataLoader.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataUtils.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataUtils.java Thu May 27 18:02:20 2010
@@ -24,7 +24,7 @@ import java.util.Random;
/**
* Helper methods that deals with data lists and arrays of values
*/
-public class DataUtils {
+public final class DataUtils {
private DataUtils() { }
/**
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java Thu May 27 18:02:20 2010
@@ -27,7 +27,7 @@ import org.apache.mahout.df.data.Dataset
/**
* Contains various methods that deal with descriptor strings
*/
-public class DescriptorUtils {
+public final class DescriptorUtils {
private DescriptorUtils() { }
/**
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/inmem/InMemInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/inmem/InMemInputFormat.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/inmem/InMemInputFormat.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/inmem/InMemInputFormat.java Thu May 27 18:02:20 2010
@@ -240,7 +240,8 @@ public class InMemInputFormat implements
return firstId == split.firstId &&
nbTrees == split.nbTrees &&
- ((seed == null && split.seed == null) || seed.equals(split.seed));
+ ((seed == null && split.seed == null) ||
+ (seed != null && seed.equals(split.seed)));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step1Mapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step1Mapper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step1Mapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step1Mapper.java Thu May 27 18:02:20 2010
@@ -146,7 +146,8 @@ public class Step1Mapper extends MapredM
}
@Override
- public void map(LongWritable key, Text value, OutputCollector<TreeID,MapredOutput> output, Reporter reporter) throws IOException {
+ public void map(LongWritable key, Text value, OutputCollector<TreeID,MapredOutput> output, Reporter reporter)
+ throws IOException {
if (this.output == null) {
this.output = output;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java Thu May 27 18:02:20 2010
@@ -107,8 +107,9 @@ public class MapredOutput implements Wri
MapredOutput mo = (MapredOutput) obj;
- return ((tree == null && mo.getTree() == null) || tree.equals(mo.getTree())) &&
- Arrays.equals(predictions, mo.getPredictions());
+ return ((tree == null && mo.getTree() == null) ||
+ (tree != null && tree.equals(mo.getTree()))) &&
+ Arrays.equals(predictions, mo.getPredictions());
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemMapper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemMapper.java Thu May 27 18:02:20 2010
@@ -81,12 +81,12 @@ public class InMemMapper extends MapredM
}
@Override
- protected void map(IntWritable key, NullWritable value, Context context) throws IOException,
- InterruptedException {
+ protected void map(IntWritable key, NullWritable value, Context context)
+ throws IOException, InterruptedException {
map(key, context);
}
- public void map(IntWritable key, Context context) throws IOException, InterruptedException {
+ protected void map(IntWritable key, Context context) throws IOException, InterruptedException {
SingleTreePredictions callback = null;
int[] predictions = null;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/InterResults.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/InterResults.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/InterResults.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/InterResults.java Thu May 27 18:02:20 2010
@@ -28,7 +28,7 @@ import org.apache.mahout.df.node.Node;
* Stores/Loads the intermediate results of step1 needed by step2.<br>
* This class should not be needed outside of the partial package, so all its methods are protected.<br>
*/
-public class InterResults {
+public final class InterResults {
private InterResults() { }
/**
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java Thu May 27 18:02:20 2010
@@ -44,7 +44,7 @@ import org.slf4j.LoggerFactory;
/**
* Generates a file descriptor for a given dataset
*/
-public class Describe {
+public final class Describe {
private static final Logger log = LoggerFactory.getLogger(Describe.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java Thu May 27 18:02:20 2010
@@ -41,7 +41,7 @@ import org.slf4j.LoggerFactory;
/**
* Compute the frequency distribution of the "class label"
*/
-public class Frequencies extends Configured implements Tool {
+public final class Frequencies extends Configured implements Tool {
private static final Logger log = LoggerFactory.getLogger(Frequencies.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java Thu May 27 18:02:20 2010
@@ -49,7 +49,7 @@ import org.slf4j.LoggerFactory;
* This tool is used to uniformly distribute the class of all the tuples of the dataset over a given number of
* partitions.
*/
-public class UDistrib {
+public final class UDistrib {
private static final Logger log = LoggerFactory.getLogger(UDistrib.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Thu May 27 18:02:20 2010
@@ -83,7 +83,7 @@ import org.slf4j.LoggerFactory;
* Then org.apache.mahout.utils.vectors.VectorDumper.main() will be called with arguments:
* <pre>{"--output", "/tmp/vectorOut", "-s", "/my/otherVector/sequenceFile"}</pre>
*/
-public class MahoutDriver {
+public final class MahoutDriver {
private static final Logger log = LoggerFactory.getLogger(MahoutDriver.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java Thu May 27 18:02:20 2010
@@ -42,7 +42,7 @@ import org.uncommons.watchmaker.framewor
* job evaluates the fitness of each individual of the population using the given evaluator. Takes care of
* storing the population into an input file, and loading the fitness from job outputs.
*/
-public class MahoutEvaluator {
+public final class MahoutEvaluator {
private MahoutEvaluator() { }
/**
@@ -56,7 +56,8 @@ public class MahoutEvaluator {
* <code>List<Double></code> that contains the evaluated fitness for each candidate from the
* input population, sorted in the same order as the candidates.
*/
- public static void evaluate(FitnessEvaluator<?> evaluator, List<?> population, List<Double> evaluations) throws IOException {
+ public static void evaluate(FitnessEvaluator<?> evaluator, List<?> population, List<Double> evaluations)
+ throws IOException {
JobConf conf = new JobConf(MahoutEvaluator.class);
FileSystem fs = FileSystem.get(conf);
Path inpath = prepareInput(fs, population);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java Thu May 27 18:02:20 2010
@@ -64,7 +64,6 @@ public class MatrixWritable implements W
}
int rowSize = in.readInt();
if (rowSize > 0) {
- rowLabelBindings = new HashMap<String, Integer>();
for (int i = 0; i < rowSize; i++) {
rowLabelBindings.put(in.readUTF(), in.readInt());
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java Thu May 27 18:02:20 2010
@@ -45,7 +45,7 @@ import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
-public class TimesSquaredJob {
+public final class TimesSquaredJob {
public static final String INPUT_VECTOR = "DistributedMatrix.times.inputVector";
public static final String IS_SPARSE_OUTPUT = "DistributedMatrix.times.outputVector.sparse";
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialOutputCollector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialOutputCollector.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialOutputCollector.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialOutputCollector.java Thu May 27 18:02:20 2010
@@ -29,7 +29,7 @@ class PartialOutputCollector implements
private final MapredOutput[] values;
- private int index = 0;
+ private int index;
PartialOutputCollector(int nbTrees) {
keys = new TreeID[nbTrees];
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommender.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommender.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommender.java Thu May 27 18:02:20 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -59,7 +59,7 @@ public final class GroupLensRecommender
@Override
public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer)
- throws TasteException {
+ throws TasteException {
return recommender.recommend(userID, howMany, rescorer);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java Thu May 27 18:02:20 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -70,7 +70,7 @@ public final class TransposeToByUser {
while (lineIterator.hasNext()) {
line = lineIterator.next();
int firstComma = line.indexOf(',');
- String userIDString= line.substring(0, firstComma);
+ String userIDString = line.substring(0, firstComma);
int secondComma = line.indexOf(',', firstComma + 1);
String ratingString = line.substring(firstComma, secondComma); // keep comma
List<String> cachedLines = byUserEntryCache.get(userIDString);
@@ -86,7 +86,8 @@ public final class TransposeToByUser {
}
- private static void maybeFlushCache(File byUserDirectory, Map<String, List<String>> byUserEntryCache) throws IOException {
+ private static void maybeFlushCache(File byUserDirectory, Map<String, List<String>> byUserEntryCache)
+ throws IOException {
if (byUserEntryCache.size() >= 100000) {
log.info("Flushing cache");
for (Map.Entry<String, List<String>> entry : byUserEntryCache.entrySet()) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java Thu May 27 18:02:20 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -134,7 +134,7 @@ public class TestForest extends Configur
outputPath = new Path(outputName);
}
} catch (OptionException e) {
- System.err.println("Exception : " + e);
+ System.out.println("Exception : " + e);
CommandLineUtil.printHelp(group);
return -1;
}
Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java Thu May 27 18:02:20 2010
@@ -87,7 +87,7 @@ public class CDMapperTest extends Mahout
Set<LongWritable> keys = collector.getKeys();
assertEquals("Number of evaluations", rules.size(), keys.size());
- CDFitness[] expected = { TP, FP, TN, FN };
+ CDFitness[] expected = {TP, FP, TN, FN};
for (LongWritable key : keys) {
int index = (int) key.get();
assertEquals("Values for key " + key, 1, collector.getValue(key).size());
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SingularValueDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SingularValueDecomposition.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SingularValueDecomposition.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SingularValueDecomposition.java Thu May 27 18:02:20 2010
@@ -16,13 +16,15 @@ import org.apache.mahout.math.matrix.Dou
public class SingularValueDecomposition implements java.io.Serializable {
/** Arrays for internal storage of U and V. */
- private final double[][] U, V;
+ private final double[][] U;
+ private final double[][] V;
/** Array for internal storage of singular values. */
private final double[] s;
/** Row and column dimensions. */
- private final int m, n;
+ private final int m;
+ private final int n;
/**
* Constructs and returns a new singular value decomposition object; The decomposed matrices can be retrieved via
@@ -538,48 +540,42 @@ public class SingularValueDecomposition
String unknown = "Illegal operation or error: ";
try {
buf.append(String.valueOf(this.cond()));
- }
- catch (IllegalArgumentException exc) {
+ } catch (IllegalArgumentException exc) {
buf.append(unknown).append(exc.getMessage());
}
buf.append("\nrank = ");
try {
buf.append(String.valueOf(this.rank()));
- }
- catch (IllegalArgumentException exc) {
+ } catch (IllegalArgumentException exc) {
buf.append(unknown).append(exc.getMessage());
}
buf.append("\nnorm2 = ");
try {
buf.append(String.valueOf(this.norm2()));
- }
- catch (IllegalArgumentException exc) {
+ } catch (IllegalArgumentException exc) {
buf.append(unknown).append(exc.getMessage());
}
buf.append("\n\nU = ");
try {
buf.append(String.valueOf(this.getU()));
- }
- catch (IllegalArgumentException exc) {
+ } catch (IllegalArgumentException exc) {
buf.append(unknown).append(exc.getMessage());
}
buf.append("\n\nS = ");
try {
buf.append(String.valueOf(this.getS()));
- }
- catch (IllegalArgumentException exc) {
+ } catch (IllegalArgumentException exc) {
buf.append(unknown).append(exc.getMessage());
}
buf.append("\n\nV = ");
try {
buf.append(String.valueOf(this.getV()));
- }
- catch (IllegalArgumentException exc) {
+ } catch (IllegalArgumentException exc) {
buf.append(unknown).append(exc.getMessage());
}
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java Thu May 27 18:02:20 2010
@@ -29,7 +29,7 @@ public class TestMatrixView extends Maho
private static final int COL = AbstractMatrix.COL;
private final double[][] values = {{0.0, 1.1, 2.2}, {1.1, 2.2, 3.3},
- {3.3, 4.4, 5.5}, {5.5, 6.6, 7.7}, {7.7, 8.8, 9.9}};
+ {3.3, 4.4, 5.5}, {5.5, 6.6, 7.7}, {7.7, 8.8, 9.9}};
private Matrix test;
Modified: mahout/trunk/maven/src/main/resources/mahout-checkstyle.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/maven/src/main/resources/mahout-checkstyle.xml?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/maven/src/main/resources/mahout-checkstyle.xml (original)
+++ mahout/trunk/maven/src/main/resources/mahout-checkstyle.xml Thu May 27 18:02:20 2010
@@ -47,7 +47,7 @@
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
<!-- <module name="PackageHtml"/> -->
<!--module name="JavadocMethod"/-->
- <module name="JavadocType"/>
+ <!--module name="JavadocType"/-->
<!--module name="JavadocVariable"/-->
<!--module name="JavadocStyle"/-->
@@ -155,10 +155,12 @@
<module name="EmptyStatement" />
<module name="EqualsHashCode" />
<!--<module name="FinalLocalVariable"/>-->
+ <!--
<module name="HiddenField">
<property name="ignoreConstructorParameter" value="true" />
<property name="ignoreSetter" value="true" />
</module>
+ -->
<module name="IllegalInstantiation" />
<!--<module name="IllegalToken"/>-->
<!--<module name="IllegalTokenText"/>-->
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Thu May 27 18:02:20 2010
@@ -178,22 +178,22 @@ public class VectorBenchmarks implements
int[] indexes = randomVectorIndices.get(randomIndex);
double[] values = randomVectorValues.get(randomIndex);
List<Integer> randomOrder = new ArrayList<Integer>();
- for(int i=0; i<indexes.length; i++) {
+ for (int i = 0; i < indexes.length; i++) {
randomOrder.add(i);
}
Collections.shuffle(randomOrder);
int[] permutation = new int[randomOrder.size()];
- for(int i=0; i<randomOrder.size(); i++) {
+ for (int i = 0; i < randomOrder.size(); i++) {
permutation[i] = randomOrder.get(i);
}
TimingStatistics.Call call = stats.newCall();
- if(useSetQuick) {
- for(int i : permutation) {
+ if (useSetQuick) {
+ for (int i : permutation) {
v.setQuick(indexes[i], values[i]);
}
} else {
- for(int i : permutation) {
+ for (int i : permutation) {
v.set(indexes[i], values[i]);
}
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java Thu May 27 18:02:20 2010
@@ -53,7 +53,7 @@ import org.apache.mahout.math.VectorWrit
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class CDbwDriver {
+public final class CDbwDriver {
public static final String STATE_IN_KEY = "org.apache.mahout.clustering.dirichlet.stateIn";
@@ -148,7 +148,7 @@ public class CDbwDriver {
conf.set(STATE_IN_KEY, stateIn.toString());
conf.set(DISTANCE_MEASURE_KEY, distanceMeasureClass);
CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
- System.out.println("CDbw = " + evaluator.CDbw());
+ //System.out.println("CDbw = " + evaluator.CDbw());
}
private static void writeInitialState(Path output, Path clustersIn)
@@ -167,7 +167,7 @@ public class CDbwDriver {
while (reader.next(key, value)) {
Cluster cluster = (Cluster) value;
if (!(cluster instanceof DirichletCluster) || ((DirichletCluster) cluster).getTotalCount() > 0) {
- System.out.println("C-" + cluster.getId() + ": " + ClusterBase.formatVector(cluster.getCenter(), null));
+ //System.out.println("C-" + cluster.getId() + ": " + ClusterBase.formatVector(cluster.getCenter(), null));
writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
}
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java Thu May 27 18:02:20 2010
@@ -17,9 +17,7 @@
package org.apache.mahout.clustering.cdbw;
-import java.io.File;
import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -58,7 +56,6 @@ public class CDbwEvaluator {
*/
public CDbwEvaluator(Map<Integer, List<VectorWritable>> representativePoints, Map<Integer, Cluster> clusters,
DistanceMeasure measure) {
- super();
this.representativePoints = representativePoints;
this.clusters = clusters;
this.measure = measure;
@@ -74,20 +71,10 @@ public class CDbwEvaluator {
* a JobConf with appropriate parameters
* @param clustersIn
* a String path to the input clusters directory
- *
- * @throws SecurityException
- * @throws IllegalArgumentException
- * @throws NoSuchMethodException
- * @throws InvocationTargetException
- * @throws ClassNotFoundException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws IOException
*/
public CDbwEvaluator(JobConf job, Path clustersIn)
- throws SecurityException, IllegalArgumentException, NoSuchMethodException,
- InvocationTargetException, ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
- super();
+ throws SecurityException, IllegalArgumentException,
+ ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
ClassLoader ccl = Thread.currentThread().getContextClassLoader();
Class<?> cl = ccl.loadClass(job.get(CDbwDriver.DISTANCE_MEASURE_KEY));
measure = (DistanceMeasure) cl.newInstance();
@@ -99,9 +86,7 @@ public class CDbwEvaluator {
}
public double CDbw() {
- double cdbw = intraClusterDensity() * separation();
- System.out.println("CDbw=" + cdbw);
- return cdbw;
+ return intraClusterDensity() * separation();
}
/**
@@ -110,18 +95,10 @@ public class CDbwEvaluator {
* @param clustersIn
* a String pathname to the directory containing input cluster files
* @return a List<Cluster> of the clusters
- *
- * @throws ClassNotFoundException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws IOException
- * @throws SecurityException
- * @throws NoSuchMethodException
- * @throws InvocationTargetException
*/
- private HashMap<Integer, Cluster> loadClusters(JobConf job, Path clustersIn)
+ private Map<Integer, Cluster> loadClusters(JobConf job, Path clustersIn)
throws InstantiationException, IllegalAccessException, IOException, SecurityException {
- HashMap<Integer, Cluster> clusters = new HashMap<Integer, Cluster>();
+ Map<Integer, Cluster> clusters = new HashMap<Integer, Cluster>();
FileSystem fs = clustersIn.getFileSystem(job);
for (FileStatus part : fs.listStatus(clustersIn)) {
if (!part.getPath().getName().startsWith(".")) {
@@ -151,10 +128,10 @@ public class CDbwEvaluator {
List<VectorWritable> repJ = representativePoints.get(cJ);
double minDistance = Double.MAX_VALUE;
Vector uIJ = null;
- for (int ptI = 0; ptI < repI.size(); ptI++) {
- for (int ptJ = 0; ptJ < repJ.size(); ptJ++) {
- Vector vI = repI.get(ptI).get();
- Vector vJ = repJ.get(ptJ).get();
+ for (VectorWritable aRepI : repI) {
+ for (VectorWritable aRepJ : repJ) {
+ Vector vI = aRepI.get();
+ Vector vJ = aRepJ.get();
double distance = measure.distance(vI, vJ);
if (distance < minDistance) {
minDistance = distance;
@@ -171,18 +148,19 @@ public class CDbwEvaluator {
density = minDistance * interDensity / stdSum;
}
- if (false) {
- System.out.println("minDistance[" + cI + "," + cJ + "]=" + minDistance);
- System.out.println("stDev[" + cI + "]=" + stDevI);
- System.out.println("stDev[" + cJ + "]=" + stDevJ);
- System.out.println("interDensity[" + cI + "," + cJ + "]=" + interDensity);
- System.out.println("density[" + cI + "," + cJ + "]=" + density);
- System.out.println();
- }
+ // Use a logger
+ //if (false) {
+ // System.out.println("minDistance[" + cI + "," + cJ + "]=" + minDistance);
+ // System.out.println("stDev[" + cI + "]=" + stDevI);
+ // System.out.println("stDev[" + cJ + "]=" + stDevJ);
+ // System.out.println("interDensity[" + cI + "," + cJ + "]=" + interDensity);
+ // System.out.println("density[" + cI + "," + cJ + "]=" + density);
+ // System.out.println();
+ //}
sum += density;
}
}
- System.out.println("interClusterDensity=" + sum);
+ //System.out.println("interClusterDensity=" + sum);
return sum;
}
@@ -206,10 +184,9 @@ public class CDbwEvaluator {
private void setStDev(int cI) {
List<VectorWritable> repPts = representativePoints.get(cI);
- double d = 0;
- if (repPts == null) {
- System.out.println();
- }
+ //if (repPts == null) {
+ // System.out.println();
+ //}
int s0 = 0;
Vector s1 = null;
Vector s2 = null;
@@ -228,8 +205,8 @@ public class CDbwEvaluator {
}
}
Vector std = s2.times(s0).minus(s1.times(s1)).assign(new SquareRootFunction()).divide(s0);
- d = std.zSum() / std.size();
- System.out.println("stDev[" + cI + "]=" + d);
+ double d = std.zSum() / std.size();
+ //System.out.println("stDev[" + cI + "]=" + d);
stDevs.put(cI, d);
}
@@ -255,9 +232,9 @@ public class CDbwEvaluator {
}
List<VectorWritable> repI = representativePoints.get(cI);
List<VectorWritable> repJ = representativePoints.get(cJ);
- for (int ptI = 0; ptI < repI.size(); ptI++) {
- for (int ptJ = 0; ptJ < repJ.size(); ptJ++) {
- double distance = measure.distance(repI.get(ptI).get(), repJ.get(ptJ).get());
+ for (VectorWritable aRepI : repI) {
+ for (VectorWritable aRepJ : repJ) {
+ double distance = measure.distance(aRepI.get(), aRepJ.get());
if (distance < minDistance) {
minDistance = distance;
}
@@ -266,7 +243,7 @@ public class CDbwEvaluator {
}
}
double separation = minDistance / (1 + interClusterDensity());
- System.out.println("separation=" + separation);
+ //System.out.println("separation=" + separation);
return separation;
}
@@ -275,7 +252,7 @@ public class CDbwEvaluator {
for (Integer cId : representativePoints.keySet()) {
avgStd += stDevs.get(cId);
}
- avgStd = avgStd / representativePoints.size();
+ avgStd /= representativePoints.size();
double sum = 0;
for (Integer cId : representativePoints.keySet()) {
@@ -291,7 +268,7 @@ public class CDbwEvaluator {
sum += cSum / repI.size();
}
double intraClusterDensity = sum / representativePoints.size();
- System.out.println("intraClusterDensity=" + intraClusterDensity);
+ //System.out.println("intraClusterDensity=" + intraClusterDensity);
return intraClusterDensity;
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java Thu May 27 18:02:20 2010
@@ -18,7 +18,6 @@
package org.apache.mahout.clustering.cdbw;
import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -52,8 +51,10 @@ public class CDbwMapper extends MapReduc
private OutputCollector<IntWritable, WeightedVectorWritable> output = null;
@Override
- public void map(IntWritable clusterId, WeightedVectorWritable point, OutputCollector<IntWritable, WeightedVectorWritable> output,
- Reporter reporter) throws IOException {
+ public void map(IntWritable clusterId,
+ WeightedVectorWritable point,
+ OutputCollector<IntWritable, WeightedVectorWritable> output,
+ Reporter reporter) throws IOException {
this.output = output;
@@ -66,7 +67,8 @@ public class CDbwMapper extends MapReduc
totalDistance += measure.distance(refPoint.get(), point.getVector().get());
}
if (currentMDP == null || currentMDP.getWeight() < totalDistance) {
- mostDistantPoints.put(key, new WeightedVectorWritable(totalDistance, new VectorWritable(point.getVector().get().clone())));
+ mostDistantPoints.put(key, new WeightedVectorWritable(totalDistance,
+ new VectorWritable(point.getVector().get().clone())));
}
}
@@ -75,8 +77,8 @@ public class CDbwMapper extends MapReduc
this.measure = measure;
}
- public static Map<Integer, List<VectorWritable>> getRepresentativePoints(JobConf job) throws SecurityException,
- IllegalArgumentException, NoSuchMethodException, InvocationTargetException {
+ public static Map<Integer, List<VectorWritable>> getRepresentativePoints(JobConf job)
+ throws SecurityException, IllegalArgumentException {
String statePath = job.get(CDbwDriver.STATE_IN_KEY);
Map<Integer, List<VectorWritable>> representativePoints = new HashMap<Integer, List<VectorWritable>>();
try {
@@ -121,10 +123,6 @@ public class CDbwMapper extends MapReduc
throw new IllegalStateException(e);
} catch (IllegalArgumentException e) {
throw new IllegalStateException(e);
- } catch (NoSuchMethodException e) {
- throw new IllegalStateException(e);
- } catch (InvocationTargetException e) {
- throw new IllegalStateException(e);
} catch (ClassNotFoundException e) {
throw new IllegalStateException(e);
} catch (InstantiationException e) {
@@ -134,9 +132,6 @@ public class CDbwMapper extends MapReduc
}
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.MapReduceBase#close()
- */
@Override
public void close() throws IOException {
for (Integer clusterId : mostDistantPoints.keySet()) {
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwReducer.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwReducer.java Thu May 27 18:02:20 2010
@@ -18,7 +18,6 @@
package org.apache.mahout.clustering.cdbw;
import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -32,15 +31,18 @@ import org.apache.hadoop.mapred.Reporter
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.math.VectorWritable;
-public class CDbwReducer extends MapReduceBase implements Reducer<IntWritable, WeightedVectorWritable, IntWritable, VectorWritable> {
+public class CDbwReducer extends MapReduceBase
+ implements Reducer<IntWritable, WeightedVectorWritable, IntWritable, VectorWritable> {
private Map<Integer, List<VectorWritable>> referencePoints;
private OutputCollector<IntWritable, VectorWritable> output;
@Override
- public void reduce(IntWritable key, Iterator<WeightedVectorWritable> values, OutputCollector<IntWritable, VectorWritable> output,
- Reporter reporter) throws IOException {
+ public void reduce(IntWritable key,
+ Iterator<WeightedVectorWritable> values,
+ OutputCollector<IntWritable, VectorWritable> output,
+ Reporter reporter) throws IOException {
this.output = output;
// find the most distant point
WeightedVectorWritable mdp = null;
@@ -81,10 +83,6 @@ public class CDbwReducer extends MapRedu
throw new IllegalStateException(e);
} catch (IllegalArgumentException e) {
throw new IllegalStateException(e);
- } catch (NoSuchMethodException e) {
- throw new IllegalStateException(e);
- } catch (InvocationTargetException e) {
- throw new IllegalStateException(e);
}
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Thu May 27 18:02:20 2010
@@ -49,7 +49,9 @@ import org.apache.mahout.utils.vectors.V
/**
* Class to print out the top K words for each topic.
*/
-public class LDAPrintTopics {
+public final class LDAPrintTopics {
+
+ private LDAPrintTopics() { }
private static class StringDoublePair implements Comparable<StringDoublePair> {
private final double score;
@@ -243,5 +245,4 @@ public class LDAPrintTopics {
}
}
- private LDAPrintTopics() { }
}