You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/03/24 21:05:55 UTC
svn commit: r1460431 [2/3] - in /mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/common/
core/src/main/java/org/apache/mahout/cf/taste/eval/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java Sun Mar 24 20:05:50 2013
@@ -27,6 +27,8 @@ import org.apache.mahout.ep.State;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.stats.OnlineAuc;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.DataInput;
import java.io.DataOutput;
@@ -36,7 +38,8 @@ import java.util.Locale;
import java.util.concurrent.ExecutionException;
/**
- * This is a meta-learner that maintains a pool of ordinary {@link org.apache.mahout.classifier.sgd.OnlineLogisticRegression} learners. Each
+ * This is a meta-learner that maintains a pool of ordinary
+ * {@link org.apache.mahout.classifier.sgd.OnlineLogisticRegression} learners. Each
* member of the pool has different learning rates. Whichever of the learners in the pool falls
* behind in terms of average log-likelihood will be tossed out and replaced with variants of the
* survivors. This will let us automatically derive an annealing schedule that optimizes learning
@@ -45,8 +48,9 @@ import java.util.concurrent.ExecutionExc
* learn also decreases the number of learning rate parameters required and replaces the normal
* hyper-parameter search.
* <p/>
- * One wrinkle is that the pool of learners that we maintain is actually a pool of {@link org.apache.mahout.classifier.sgd.CrossFoldLearner}
- * which themselves contain several OnlineLogisticRegression objects. These pools allow estimation
+ * One wrinkle is that the pool of learners that we maintain is actually a pool of
+ * {@link org.apache.mahout.classifier.sgd.CrossFoldLearner} which themselves contain several OnlineLogisticRegression
+ * objects. These pools allow estimation
* of performance on the fly even if we make many passes through the data. This does, however,
* increase the cost of training since if we are using 5-fold cross-validation, each vector is used
* 4 times for training and once for classification. If this becomes a problem, then we should
@@ -85,8 +89,9 @@ public class AdaptiveLogisticRegression
private boolean freezeSurvivors = true;
- public AdaptiveLogisticRegression() {
- }
+ private static final Logger log = LoggerFactory.getLogger(AdaptiveLogisticRegression.class);
+
+ public AdaptiveLogisticRegression() {}
/**
* Uses {@link #DEFAULT_THREAD_COUNT} and {@link #DEFAULT_POOL_SIZE}
@@ -108,7 +113,8 @@ public class AdaptiveLogisticRegression
* @param threadCount The number of threads to use for training
* @param poolSize The number of {@link org.apache.mahout.classifier.sgd.CrossFoldLearner} to use.
*/
- public AdaptiveLogisticRegression(int numCategories, int numFeatures, PriorFunction prior, int threadCount, int poolSize) {
+ public AdaptiveLogisticRegression(int numCategories, int numFeatures, PriorFunction prior, int threadCount,
+ int poolSize) {
this.numFeatures = numFeatures;
this.threadCount = threadCount;
this.poolSize = poolSize;
@@ -164,6 +170,7 @@ public class AdaptiveLogisticRegression
});
} catch (InterruptedException e) {
// ignore ... shouldn't happen
+ log.warn("Ignoring exception", e);
} catch (ExecutionException e) {
throw new IllegalStateException(e.getCause());
}
@@ -229,7 +236,7 @@ public class AdaptiveLogisticRegression
});
ep.close();
} catch (InterruptedException e) {
- // ignore
+ log.warn("Ignoring exception", e);
} catch (ExecutionException e) {
throw new IllegalStateException(e);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java Sun Mar 24 20:05:50 2013
@@ -304,7 +304,7 @@ public class CsvRecordFactory implements
* @return the raw target label
*/
public String getTargetLabel(int code) {
- for (String key: targetDictionary.values()) {
+ for (String key : targetDictionary.values()) {
if (targetDictionary.intern(key) == code) {
return key;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java Sun Mar 24 20:05:50 2013
@@ -20,4 +20,4 @@
* These classes currently implement a form of feature hashing with
* multiple probes to limit feature ambiguity.</p>
*/
-package org.apache.mahout.classifier.sgd;
\ No newline at end of file
+package org.apache.mahout.classifier.sgd;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyConfigKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyConfigKeys.java Sun Mar 24 20:05:50 2013
@@ -17,21 +17,21 @@
package org.apache.mahout.clustering.canopy;
-public interface CanopyConfigKeys {
+public final class CanopyConfigKeys {
- String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
+ private CanopyConfigKeys() {}
- String CANOPY_PATH_KEY = "org.apache.mahout.clustering.canopy.path";
+ public static final String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
- String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
+ public static final String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
- String T3_KEY = "org.apache.mahout.clustering.canopy.t3";
+ public static final String T3_KEY = "org.apache.mahout.clustering.canopy.t3";
- String T4_KEY = "org.apache.mahout.clustering.canopy.t4";
+ public static final String T4_KEY = "org.apache.mahout.clustering.canopy.t4";
// keys used by Driver, Mapper, Combiner & Reducer
- String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
+ public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
- String CF_KEY = "org.apache.mahout.clustering.canopy.canopyFilter";
+ public static final String CF_KEY = "org.apache.mahout.clustering.canopy.canopyFilter";
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Sun Mar 24 20:05:50 2013
@@ -279,7 +279,7 @@ public class CanopyDriver extends Abstra
clusterer.addPointToCanopies(vw.get(), canopies);
}
- Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0'+ Cluster.FINAL_ITERATION_SUFFIX);
+ Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0' + Cluster.FINAL_ITERATION_SUFFIX);
Path path = new Path(canopyOutputDir, "part-r-00000");
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
Text.class, ClusterWritable.class);
@@ -295,8 +295,8 @@ public class CanopyDriver extends Abstra
AbstractCluster.formatVector(canopy.getRadius(), null));
}
if (canopy.getNumObservations() > clusterFilter) {
- clusterWritable.setValue(canopy);
- writer.append(new Text(canopy.getIdentifier()), clusterWritable);
+ clusterWritable.setValue(canopy);
+ writer.append(new Text(canopy.getIdentifier()), clusterWritable);
}
}
} finally {
@@ -375,5 +375,5 @@ public class CanopyDriver extends Abstra
new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
clusterClassificationThreshold, true, runSequential);
}
-
+
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java Sun Mar 24 20:05:50 2013
@@ -50,7 +50,7 @@ public class CanopyReducer extends Reduc
ClusterWritable clusterWritable = new ClusterWritable();
canopy.computeParameters();
if (canopy.getNumObservations() > clusterFilter) {
- clusterWritable.setValue(canopy);
+ clusterWritable.setValue(canopy);
context.write(new Text(canopy.getIdentifier()), clusterWritable);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Sun Mar 24 20:05:50 2013
@@ -228,8 +228,8 @@ public class DirichletDriver extends Abs
int numModels, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException,
InterruptedException, ClassNotFoundException {
ClusterClassifier.writePolicy(new DirichletClusteringPolicy(numModels, alpha0), stateIn);
- ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold,
- emitMostLikely, runSequential);
+ ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
+ threshold, emitMostLikely, runSequential);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansConfigKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansConfigKeys.java Sun Mar 24 20:05:50 2013
@@ -17,18 +17,10 @@
package org.apache.mahout.clustering.fuzzykmeans;
-public interface FuzzyKMeansConfigKeys {
+public final class FuzzyKMeansConfigKeys {
- String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
+ private FuzzyKMeansConfigKeys() {}
- String CLUSTER_PATH_KEY = "org.apache.mahout.clustering.kmeans.path";
-
- String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.kmeans.convergence";
-
- String M_KEY = "org.apache.mahout.clustering.fuzzykmeans.m";
-
- String EMIT_MOST_LIKELY_KEY = "org.apache.mahout.clustering.fuzzykmeans.emitMostLikely";
-
- String THRESHOLD_KEY = "org.apache.mahout.clustering.fuzzykmeans.threshold";
+ public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Sun Mar 24 20:05:50 2013
@@ -268,7 +268,7 @@ public class FuzzyKMeansDriver extends A
List<Cluster> clusters = Lists.newArrayList();
FuzzyKMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
- if (conf==null) {
+ if (conf == null) {
conf = new Configuration();
}
@@ -320,7 +320,7 @@ public class FuzzyKMeansDriver extends A
throws IOException, ClassNotFoundException, InterruptedException {
ClusterClassifier.writePolicy(new FuzzyKMeansClusteringPolicy(m, convergenceDelta), clustersIn);
- ClusterClassificationDriver.run(input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, emitMostLikely,
- runSequential);
+ ClusterClassificationDriver.run(input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
+ threshold, emitMostLikely, runSequential);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java Sun Mar 24 20:05:50 2013
@@ -63,4 +63,4 @@ public interface ClusteringPolicy extend
*/
void close(ClusterClassifier posterior);
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java Sun Mar 24 20:05:50 2013
@@ -20,12 +20,11 @@ package org.apache.mahout.clustering.kme
/**
* This class holds all config keys that are relevant to be used in the KMeans MapReduce configuration.
* */
-public interface KMeansConfigKeys {
+public final class KMeansConfigKeys {
+
+ private KMeansConfigKeys() {}
+
/** Configuration key for distance measure to use. */
- String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
- /** Configuration key for convergence threshold. */
- String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.kmeans.convergence";
- /** Configuration key for iteration cluster path */
- String CLUSTER_PATH_KEY = "org.apache.mahout.clustering.kmeans.path";
+ public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Sun Mar 24 20:05:50 2013
@@ -142,7 +142,8 @@ public class KMeansDriver extends Abstra
if (log.isInfoEnabled()) {
log.info("Input: {} Clusters In: {} Out: {} Distance: {}", input, clustersIn, output,
measure.getClass().getName());
- log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", convergenceDelta, maxIterations, VectorWritable.class.getName());
+ log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", convergenceDelta,
+ maxIterations, VectorWritable.class.getName());
}
Path clustersOut = buildClusters(conf, input, clustersIn, output, measure, maxIterations, delta, runSequential);
if (runClustering) {
@@ -259,4 +260,4 @@ public class KMeansDriver extends Abstra
clusterClassificationThreshold, true, runSequential);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java Sun Mar 24 20:05:50 2013
@@ -2,4 +2,4 @@
* This package provides an implementation of the <a href="http://en.wikipedia.org/wiki/Kmeans">k-means</a> clustering
* algorithm.
*/
-package org.apache.mahout.clustering.kmeans;
\ No newline at end of file
+package org.apache.mahout.clustering.kmeans;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java Sun Mar 24 20:05:50 2013
@@ -31,7 +31,7 @@ public class CVB0DocInferenceMapper exte
public void map(IntWritable docId, VectorWritable doc, Context context)
throws IOException, InterruptedException {
int numTopics = getNumTopics();
- Vector docTopics = new DenseVector(new double[numTopics]).assign(1.0 /numTopics);
+ Vector docTopics = new DenseVector(new double[numTopics]).assign(1.0 / numTopics);
Matrix docModel = new SparseRowMatrix(numTopics, doc.get().size());
int maxIters = getMaxIters();
ModelTrainer modelTrainer = getModelTrainer();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java Sun Mar 24 20:05:50 2013
@@ -273,7 +273,8 @@ public class CVB0Driver extends Abstract
}
log.info("Backfilling perplexity at iteration {}", i);
if (!fs.exists(modelPath)) {
- log.error("Model path '{}' does not exist; Skipping iteration {} perplexity calculation", modelPath.toString(), i);
+ log.error("Model path '{}' does not exist; Skipping iteration {} perplexity calculation",
+ modelPath.toString(), i);
continue;
}
perplexity = calculatePerplexity(conf, inputPath, modelPath, i);
@@ -308,7 +309,8 @@ public class CVB0Driver extends Abstract
if (testFraction > 0 && iterationNumber % iterationBlockSize == 0) {
perplexities.add(calculatePerplexity(conf, inputPath, modelOutputPath, iterationNumber));
log.info("Current perplexity = {}", perplexities.get(perplexities.size() - 1));
- log.info("(p_{} - p_{}) / p_0 = {}; target = {}", iterationNumber, iterationNumber - iterationBlockSize, rateOfChange(perplexities), convergenceDelta);
+ log.info("(p_{} - p_{}) / p_0 = {}; target = {}", iterationNumber, iterationNumber - iterationBlockSize,
+ rateOfChange(perplexities), convergenceDelta);
}
}
log.info("Completed {} iterations in {} seconds", iterationNumber,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java Sun Mar 24 20:05:50 2013
@@ -110,9 +110,9 @@ public class CachingCVB0Mapper
@Override
public void map(IntWritable docId, VectorWritable document, Context context)
- throws IOException, InterruptedException{
+ throws IOException, InterruptedException {
/* where to get docTopics? */
- Vector topicVector = new DenseVector(new double[numTopics]).assign(1.0/numTopics);
+ Vector topicVector = new DenseVector(new double[numTopics]).assign(1.0 / numTopics);
modelTrainer.train(document.get(), topicVector, true, maxIters);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java Sun Mar 24 20:05:50 2013
@@ -95,7 +95,7 @@ public class CachingCVB0PerplexityMapper
@Override
public void map(IntWritable docId, VectorWritable document, Context context)
- throws IOException, InterruptedException{
+ throws IOException, InterruptedException {
if (testFraction < 1.0f && random.nextFloat() >= testFraction) {
return;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Sun Mar 24 20:05:50 2013
@@ -121,7 +121,7 @@ public class InMemoryCollapsedVariationa
numTerms = terms != null ? terms.length : corpus.numCols();
Map<String, Integer> termIdMap = Maps.newHashMap();
if (terms != null) {
- for (int t=0; t<terms.length; t++) {
+ for (int t = 0; t < terms.length; t++) {
termIdMap.put(terms[t], t);
}
}
@@ -134,7 +134,7 @@ public class InMemoryCollapsedVariationa
private void postInitCorpus() {
totalCorpusWeight = 0;
int numNonZero = 0;
- for (int i=0; i<numDocuments; i++) {
+ for (int i = 0; i < numDocuments; i++) {
Vector v = corpusWeights.viewRow(i);
double norm;
if (v != null && (norm = v.norm(1)) != 0) {
@@ -148,8 +148,7 @@ public class InMemoryCollapsedVariationa
private void initializeModel() {
TopicModel topicModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(), terms,
- numUpdatingThreads,
- initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
+ numUpdatingThreads, initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
topicModel.setConf(getConf());
TopicModel updatedModel = initialModelCorpusFraction == 0
@@ -157,7 +156,7 @@ public class InMemoryCollapsedVariationa
: topicModel;
updatedModel.setConf(getConf());
docTopicCounts = new DenseMatrix(numDocuments, numTopics);
- docTopicCounts.assign(1.0/numTopics);
+ docTopicCounts.assign(1.0 / numTopics);
modelTrainer = new ModelTrainer(topicModel, updatedModel, numTrainingThreads, numTopics, numTerms);
}
@@ -179,8 +178,8 @@ public class InMemoryCollapsedVariationa
long start = System.nanoTime();
modelTrainer.start();
for (int docId = 0; docId < corpusWeights.numRows(); docId++) {
- if (testFraction == 0 || docId % (1/testFraction) != 0) {
- Vector docTopics = new DenseVector(numTopics).assign(1.0/numTopics); // docTopicCounts.getRow(docId)
+ if (testFraction == 0 || docId % (1 / testFraction) != 0) {
+ Vector docTopics = new DenseVector(numTopics).assign(1.0 / numTopics); // docTopicCounts.getRow(docId)
modelTrainer.trainSync(corpusWeights.viewRow(docId), docTopics , true, 10);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java Sun Mar 24 20:05:50 2013
@@ -124,7 +124,7 @@ public class ModelTrainer {
int docId = docSlice.index();
Vector document = docSlice.vector();
Vector topicDist = topicSlice.vector();
- if (testFraction == 0 || docId % (1/testFraction) == 0) {
+ if (testFraction == 0 || docId % (1 / testFraction) == 0) {
trainSync(document, topicDist, false, 10);
perplexity += readModel.perplexity(document, topicDist);
matrixNorm += document.norm(1);
@@ -166,7 +166,7 @@ public class ModelTrainer {
train(document, topicDist, true, numDocTopicIters);
if (log.isDebugEnabled()) {
times[i % times.length] =
- (System.nanoTime() - start) /(1.0e6 * document.getNumNondefaultElements());
+ (System.nanoTime() - start) / (1.0e6 * document.getNumNondefaultElements());
if (i % 100 == 0) {
long time = System.nanoTime() - startTime;
log.debug("trained {} documents in {}ms", i, time / 1.0e6);
@@ -257,7 +257,7 @@ public class ModelTrainer {
readModel.persist(outputPath, true);
}
- private static class TrainerRunnable implements Runnable, Callable<Double> {
+ private static final class TrainerRunnable implements Runnable, Callable<Double> {
private final TopicModel readModel;
private final TopicModel writeModel;
private final Vector document;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java Sun Mar 24 20:05:50 2013
@@ -282,7 +282,7 @@ public class TopicModel implements Confi
topics.set(x, docTopicModel.viewRow(x).norm(1));
}
// now renormalize so that sum_x(p(x|doc)) = 1
- topics.assign(Functions.mult(1/topics.norm(1)));
+ topics.assign(Functions.mult(1 / topics.norm(1)));
}
public Vector infer(Vector original, Vector docTopics) {
@@ -357,7 +357,8 @@ public class TopicModel implements Confi
int termIndex = e.index();
// calc un-normalized p(topic x | term a, document i)
- double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha) / (topicSum + eta * numTerms);
+ double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha) /
+ (topicSum + eta * numTerms);
termTopicRow.set(termIndex, termTopicLikelihood);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterMapper.java Sun Mar 24 20:05:50 2013
@@ -42,7 +42,7 @@ public class MeanShiftCanopyClusterMappe
protected void map(WritableComparable<?> key, ClusterWritable clusterWritable, Context context)
throws IOException, InterruptedException {
// canopies use canopyIds assigned when input vectors are processed as vectorIds too
- MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
+ MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
int vectorId = canopy.getId();
for (MeanShiftCanopy msc : canopies) {
for (int containedId : msc.getBoundPoints().toList()) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java Sun Mar 24 20:05:50 2013
@@ -17,15 +17,17 @@
package org.apache.mahout.clustering.meanshift;
-public interface MeanShiftCanopyConfigKeys {
+public final class MeanShiftCanopyConfigKeys {
- // keys used by Driver, Mapper, Combiner & Reducer
- String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
- String KERNEL_PROFILE_KEY = "org.apache.mahout.clustering.canopy.kernelprofile";
- String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
- String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
- String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path";
- String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence";
- String CLUSTER_POINTS_KEY = "org.apache.mahout.clustering.meanshift.clusterPointsKey";
+ private MeanShiftCanopyConfigKeys() {}
+
+ // keys used by Driver, Mapper, Combiner & Reducer
+ public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
+ public static final String KERNEL_PROFILE_KEY = "org.apache.mahout.clustering.canopy.kernelprofile";
+ public static final String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
+ public static final String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
+ public static final String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path";
+ public static final String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence";
+ public static final String CLUSTER_POINTS_KEY = "org.apache.mahout.clustering.meanshift.clusterPointsKey";
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Sun Mar 24 20:05:50 2013
@@ -215,10 +215,10 @@ public class MeanShiftCanopyDriver exten
for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(
s.getPath(), conf)) {
MeanShiftCanopy initialCanopy = MeanShiftCanopy.initialCanopy(value.get(),
- id++, measure);
+ id++, measure);
ClusterWritable clusterWritable = new ClusterWritable();
clusterWritable.setValue(initialCanopy);
- writer.append(new Text(), clusterWritable);
+ writer.append(new Text(), clusterWritable);
}
} finally {
Closeables.closeQuietly(writer);
@@ -308,8 +308,8 @@ public class MeanShiftCanopyDriver exten
FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(
clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
- MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
- clusterer.mergeCanopy(canopy, clusters);
+ MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
+ clusterer.mergeCanopy(canopy, clusters);
}
boolean[] converged = { false };
int iteration = 1;
@@ -340,8 +340,8 @@ public class MeanShiftCanopyDriver exten
clustersIn = clustersOut;
iteration++;
}
- Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1));
- Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + "-final");
+ Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration - 1));
+ Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration - 1) + "-final");
FileSystem.get(fromPath.toUri(), conf).rename(fromPath, finalClustersIn);
return finalClustersIn;
}
@@ -379,8 +379,8 @@ public class MeanShiftCanopyDriver exten
conf.set(MAPRED_REDUCE_TASKS, String.valueOf(numReducers));
}
}
- Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1));
- Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + Cluster.FINAL_ITERATION_SUFFIX);
+ Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration - 1));
+ Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration - 1) + Cluster.FINAL_ITERATION_SUFFIX);
FileSystem.get(fromPath.toUri(), conf).rename(fromPath, finalClustersIn);
return finalClustersIn;
}
@@ -476,7 +476,7 @@ public class MeanShiftCanopyDriver exten
for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(
clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
MeanShiftCanopy cluster = (MeanShiftCanopy) clusterWritable.getValue();
- clusters.add(cluster);
+ clusters.add(cluster);
}
// iterate over all points, assigning each to the closest canopy and
// outputting that clustering
@@ -491,7 +491,7 @@ public class MeanShiftCanopyDriver exten
for (Pair<Writable, ClusterWritable> record : new SequenceFileIterable<Writable, ClusterWritable>(
s.getPath(), conf)) {
ClusterWritable clusterWritable = record.getSecond();
- MeanShiftCanopy canopy = (MeanShiftCanopy) clusterWritable.getValue();
+ MeanShiftCanopy canopy = (MeanShiftCanopy) clusterWritable.getValue();
MeanShiftCanopy closest = MeanShiftCanopyClusterer
.findCoveringCanopy(canopy, clusters);
writer.append(new IntWritable(closest.getId()),
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Sun Mar 24 20:05:50 2013
@@ -30,7 +30,7 @@ import org.apache.mahout.clustering.iter
import com.google.common.collect.Lists;
public class MeanShiftCanopyReducer extends Reducer<Text,ClusterWritable,Text,ClusterWritable> {
-
+
private final Collection<MeanShiftCanopy> canopies = Lists.newArrayList();
private MeanShiftCanopyClusterer clusterer;
private boolean allConverged = true;
@@ -45,10 +45,10 @@ public class MeanShiftCanopyReducer exte
protected void reduce(Text key, Iterable<ClusterWritable> values, Context context)
throws IOException, InterruptedException {
for (ClusterWritable clusterWritable : values) {
- MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
- clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
+ MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
+ clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
}
-
+
for (MeanShiftCanopy canopy : canopies) {
boolean converged = clusterer.shiftToMean(canopy);
if (converged) {
@@ -59,7 +59,7 @@ public class MeanShiftCanopyReducer exte
clusterWritable.setValue(canopy);
context.write(new Text(canopy.getIdentifier()), clusterWritable);
}
-
+
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java Sun Mar 24 20:05:50 2013
@@ -10,4 +10,4 @@
*
* <p>Output of each clustering algorithm is either a hard or soft assignment of items to clusters.</p>
*/
-package org.apache.mahout.clustering;
\ No newline at end of file
+package org.apache.mahout.clustering;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Sun Mar 24 20:05:50 2013
@@ -135,7 +135,7 @@ public class EigencutsDriver extends Abs
DistributedRowMatrix U = performEigenDecomposition(conf, L, state, eigenrank, overshoot, outputCalc);
U.setConf(new Configuration(conf));
List<Double> eigenValues = Lists.newArrayList();
- for (int i=0; i<eigenrank; i++) {
+ for (int i = 0; i < eigenrank; i++) {
eigenValues.set(i, state.getSingularValue(i));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsKeys.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsKeys.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsKeys.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsKeys.java Sun Mar 24 20:05:50 2013
@@ -20,65 +20,67 @@ package org.apache.mahout.clustering.spe
/**
* Configuration keys for the Eigencuts algorithm (analogous to KMeansConfigKeys)
*/
-public interface EigencutsKeys {
+public final class EigencutsKeys {
+
+ private EigencutsKeys() {}
/**
* B_0, or the user-specified minimum eigenflow half-life threshold
* for an eigenvector/eigenvalue pair to be considered. Increasing
* B_0 equates to fewer clusters
*/
- String BETA = "org.apache.mahout.clustering.spectral.beta";
+ public static final String BETA = "org.apache.mahout.clustering.spectral.beta";
/**
* Tau, or the user-specified threshold for making cuts (setting edge
* affinities to 0) after performing non-maximal suppression on edge weight
* sensitivies. Increasing tau equates to more edge cuts
*/
- String TAU = "org.apache.mahout.clustering.spectral.tau";
+ public static final String TAU = "org.apache.mahout.clustering.spectral.tau";
/**
* The normalization factor for computing the cut threshold
*/
- String DELTA = "org.apache.mahout.clustering.spectral.delta";
+ public static final String DELTA = "org.apache.mahout.clustering.spectral.delta";
/**
* Epsilon, or the user-specified coefficient that works in tandem with
* MINIMUM_HALF_LIFE to determine which eigenvector/eigenvalue pairs to use.
* Increasing epsilon equates to fewer eigenvector/eigenvalue pairs
*/
- String EPSILON = "org.apache.mahout.clustering.spectral.epsilon";
+ public static final String EPSILON = "org.apache.mahout.clustering.spectral.epsilon";
/**
* Base path to the location on HDFS where the diagonal matrix (a vector)
* and the list of eigenvalues will be stored for one of the map/reduce
* jobs in Eigencuts.
*/
- String VECTOR_CACHE_BASE = "org.apache.mahout.clustering.spectral.eigencuts.vectorcache";
+ public static final String VECTOR_CACHE_BASE = "org.apache.mahout.clustering.spectral.eigencuts.vectorcache";
/**
* Refers to the dimensions of the raw affinity matrix input. Since this
* matrix is symmetrical, it is a square matrix, hence all its dimensions
* are equal.
*/
- String AFFINITY_DIMENSIONS = "org.apache.mahout.clustering.spectral.eigencuts.affinitydimensions";
+ public static final String AFFINITY_DIMENSIONS = "org.apache.mahout.clustering.spectral.eigencuts.affinitydimensions";
/**
* Refers to the Path to the SequenceFile representing the affinity matrix
*/
- String AFFINITY_PATH = "org.apache.mahout.clustering.spectral.eigencuts.affinitypath";
+ public static final String AFFINITY_PATH = "org.apache.mahout.clustering.spectral.eigencuts.affinitypath";
/**
* Refers to the Path to the SequenceFile representing the cut matrix
*/
- String CUTMATRIX_PATH = "org.apache.mahout.clustering.spectral.eigencuts.cutmatrixpath";
+ public static final String CUTMATRIX_PATH = "org.apache.mahout.clustering.spectral.eigencuts.cutmatrixpath";
/**
* Sets the SequenceFile index for the list of eigenvalues.
*/
- int EIGENVALUES_CACHE_INDEX = 0;
+ public static final int EIGENVALUES_CACHE_INDEX = 0;
/**
* Sets the SequenceFile index for the diagonal matrix.
*/
- int DIAGONAL_CACHE_INDEX = 1;
+ public static final int DIAGONAL_CACHE_INDEX = 1;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java Sun Mar 24 20:05:50 2013
@@ -50,83 +50,83 @@ import org.apache.mahout.math.hadoop.sto
*/
public class SpectralKMeansDriver extends AbstractJob {
- public static final double OVERSHOOTMULTIPLIER = 2.0;
- public static final int REDUCERS = 10;
- public static final int BLOCKHEIGHT = 30000;
- public static final int OVERSAMPLING = 15;
- public static final int POWERITERS = 0;
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new SpectralKMeansDriver(), args);
- }
-
- @Override
- public int run(String[] arg0)
- throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
-
- Configuration conf = getConf();
- addInputOption();
- addOutputOption();
- addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
- addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
- addOption(DefaultOptionCreator.distanceMeasureOption().create());
- addOption(DefaultOptionCreator.convergenceOption().create());
- addOption(DefaultOptionCreator.maxIterationsOption().create());
- addOption(DefaultOptionCreator.overwriteOption().create());
- addFlag("usessvd", "ssvd", "Uses SSVD as the eigensolver. Default is the Lanczos solver.");
- addOption("reduceTasks", "t", "Number of reducers for SSVD", String.valueOf(REDUCERS));
- addOption("outerProdBlockHeight", "oh", "Block height of outer products for SSVD", String.valueOf(BLOCKHEIGHT));
- addOption("oversampling", "p", "Oversampling parameter for SSVD", String.valueOf(OVERSAMPLING));
- addOption("powerIter", "q", "Additional power iterations for SSVD", String.valueOf(POWERITERS));
-
- Map<String, List<String>> parsedArgs = parseArguments(arg0);
- if (parsedArgs == null) {
- return 0;
- }
-
- Path input = getInputPath();
- Path output = getOutputPath();
- if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
- HadoopUtil.delete(conf, output);
- }
- int numDims = Integer.parseInt(getOption("dimensions"));
- int clusters = Integer.parseInt(getOption("clusters"));
- String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
- DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
- double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
- int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
-
- Path tempdir = new Path(getOption("tempDir"));
- boolean ssvd = parsedArgs.containsKey("--usessvd");
- if (ssvd) {
- int reducers = Integer.parseInt(getOption("reduceTasks"));
- int blockheight = Integer.parseInt(getOption("outerProdBlockHeight"));
- int oversampling = Integer.parseInt(getOption("oversampling"));
- int poweriters = Integer.parseInt(getOption("powerIter"));
- run(conf, input, output, numDims, clusters, measure, convergenceDelta,
- maxIterations, tempdir, true, reducers, blockheight, oversampling, poweriters);
- } else {
- run(conf, input, output, numDims, clusters, measure, convergenceDelta,
- maxIterations, tempdir, false);
- }
-
- return 0;
- }
-
- public static void run(
- Configuration conf,
- Path input,
- Path output,
- int numDims,
- int clusters,
- DistanceMeasure measure,
- double convergenceDelta,
- int maxIterations,
- Path tempDir,
- boolean ssvd) throws IOException, InterruptedException, ClassNotFoundException {
- run(conf, input, output, numDims, clusters, measure, convergenceDelta,
- maxIterations, tempDir, ssvd, REDUCERS, BLOCKHEIGHT, OVERSAMPLING, POWERITERS);
- }
+ public static final double OVERSHOOTMULTIPLIER = 2.0;
+ public static final int REDUCERS = 10;
+ public static final int BLOCKHEIGHT = 30000;
+ public static final int OVERSAMPLING = 15;
+ public static final int POWERITERS = 0;
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new SpectralKMeansDriver(), args);
+ }
+
+ @Override
+ public int run(String[] arg0)
+ throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
+
+ Configuration conf = getConf();
+ addInputOption();
+ addOutputOption();
+ addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
+ addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
+ addOption(DefaultOptionCreator.distanceMeasureOption().create());
+ addOption(DefaultOptionCreator.convergenceOption().create());
+ addOption(DefaultOptionCreator.maxIterationsOption().create());
+ addOption(DefaultOptionCreator.overwriteOption().create());
+ addFlag("usessvd", "ssvd", "Uses SSVD as the eigensolver. Default is the Lanczos solver.");
+ addOption("reduceTasks", "t", "Number of reducers for SSVD", String.valueOf(REDUCERS));
+ addOption("outerProdBlockHeight", "oh", "Block height of outer products for SSVD", String.valueOf(BLOCKHEIGHT));
+ addOption("oversampling", "p", "Oversampling parameter for SSVD", String.valueOf(OVERSAMPLING));
+ addOption("powerIter", "q", "Additional power iterations for SSVD", String.valueOf(POWERITERS));
+
+ Map<String, List<String>> parsedArgs = parseArguments(arg0);
+ if (parsedArgs == null) {
+ return 0;
+ }
+
+ Path input = getInputPath();
+ Path output = getOutputPath();
+ if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
+ HadoopUtil.delete(conf, output);
+ }
+ int numDims = Integer.parseInt(getOption("dimensions"));
+ int clusters = Integer.parseInt(getOption("clusters"));
+ String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
+ DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
+ double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
+ int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
+
+ Path tempdir = new Path(getOption("tempDir"));
+ boolean ssvd = parsedArgs.containsKey("--usessvd");
+ if (ssvd) {
+ int reducers = Integer.parseInt(getOption("reduceTasks"));
+ int blockheight = Integer.parseInt(getOption("outerProdBlockHeight"));
+ int oversampling = Integer.parseInt(getOption("oversampling"));
+ int poweriters = Integer.parseInt(getOption("powerIter"));
+ run(conf, input, output, numDims, clusters, measure, convergenceDelta,
+ maxIterations, tempdir, true, reducers, blockheight, oversampling, poweriters);
+ } else {
+ run(conf, input, output, numDims, clusters, measure, convergenceDelta,
+ maxIterations, tempdir, false);
+ }
+
+ return 0;
+ }
+
+ public static void run(
+ Configuration conf,
+ Path input,
+ Path output,
+ int numDims,
+ int clusters,
+ DistanceMeasure measure,
+ double convergenceDelta,
+ int maxIterations,
+ Path tempDir,
+ boolean ssvd) throws IOException, InterruptedException, ClassNotFoundException {
+ run(conf, input, output, numDims, clusters, measure, convergenceDelta,
+ maxIterations, tempDir, ssvd, REDUCERS, BLOCKHEIGHT, OVERSAMPLING, POWERITERS);
+ }
/**
* Run the Spectral KMeans clustering on the supplied arguments
@@ -146,125 +146,125 @@ public class SpectralKMeansDriver extend
* @param oversampling
* @param poweriters
*/
- public static void run(
- Configuration conf,
- Path input,
- Path output,
- int numDims,
- int clusters,
- DistanceMeasure measure,
- double convergenceDelta,
- int maxIterations,
- Path tempDir,
- boolean ssvd,
- int numReducers,
- int blockHeight,
- int oversampling,
- int poweriters)
- throws IOException, InterruptedException, ClassNotFoundException {
-
- Path outputCalc = new Path(tempDir, "calculations");
- Path outputTmp = new Path(tempDir, "temporary");
-
- // Take in the raw CSV text file and split it ourselves,
- // creating our own SequenceFiles for the matrices to read later
- // (similar to the style of syntheticcontrol.canopy.InputMapper)
- Path affSeqFiles = new Path(outputCalc, "seqfile");
- AffinityMatrixInputJob.runJob(input, affSeqFiles, numDims, numDims);
-
- // Construct the affinity matrix using the newly-created sequence files
- DistributedRowMatrix A =
- new DistributedRowMatrix(affSeqFiles, new Path(outputTmp, "afftmp"), numDims, numDims);
-
- Configuration depConf = new Configuration(conf);
- A.setConf(depConf);
-
- // Construct the diagonal matrix D (represented as a vector)
- Vector D = MatrixDiagonalizeJob.runJob(affSeqFiles, numDims);
-
- //Calculate the normalized Laplacian of the form: L = D^(-0.5)AD^(-0.5)
- DistributedRowMatrix L = VectorMatrixMultiplicationJob.runJob(affSeqFiles, D,
- new Path(outputCalc, "laplacian"), new Path(outputCalc, outputCalc));
- L.setConf(depConf);
-
- Path data;
-
- if (ssvd) {
- // SSVD requires an array of Paths to function. So we pass in an array of length one
- Path [] LPath = new Path[1];
- LPath[0] = L.getRowPath();
-
- Path SSVDout = new Path(outputCalc, "SSVD");
-
- SSVDSolver solveIt = new SSVDSolver(
- depConf,
- LPath,
- SSVDout,
- blockHeight,
- clusters,
- oversampling,
- numReducers);
-
- solveIt.setComputeV(false);
- solveIt.setComputeU(true);
- solveIt.setOverwrite(true);
- solveIt.setQ(poweriters);
- //solveIt.setBroadcast(false);
- solveIt.run();
- data = new Path(solveIt.getUPath());
- } else {
- // Perform eigen-decomposition using LanczosSolver
- // since some of the eigen-output is spurious and will be eliminated
- // upon verification, we have to aim to overshoot and then discard
- // unnecessary vectors later
- int overshoot = Math.min((int) ((double) clusters * OVERSHOOTMULTIPLIER), numDims);
- DistributedLanczosSolver solver = new DistributedLanczosSolver();
- LanczosState state = new LanczosState(L, overshoot, solver.getInitialVector(L));
- Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors");
-
- solver.runJob(conf,
- state,
- overshoot,
- true,
- lanczosSeqFiles.toString());
-
- // perform a verification
- EigenVerificationJob verifier = new EigenVerificationJob();
- Path verifiedEigensPath = new Path(outputCalc, "eigenverifier");
- verifier.runJob(conf,
- lanczosSeqFiles,
- L.getRowPath(),
- verifiedEigensPath,
- true,
- 1.0,
- clusters);
-
- Path cleanedEigens = verifier.getCleanedEigensPath();
- DistributedRowMatrix W = new DistributedRowMatrix(
- cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
- W.setConf(depConf);
- DistributedRowMatrix Wtrans = W.transpose();
- data = Wtrans.getRowPath();
- }
-
- // Normalize the rows of Wt to unit length
- // normalize is important because it reduces the occurrence of two unique clusters combining into one
- Path unitVectors = new Path(outputCalc, "unitvectors");
-
- UnitVectorizerJob.runJob(data, unitVectors);
-
- DistributedRowMatrix Wt = new DistributedRowMatrix(
- unitVectors, new Path(unitVectors, "tmp"), clusters, numDims);
- Wt.setConf(depConf);
- data = Wt.getRowPath();
-
- // Generate random initial clusters
- Path initialclusters = RandomSeedGenerator.buildRandom(conf, data,
- new Path(output, Cluster.INITIAL_CLUSTERS_DIR), clusters, measure);
-
- // Run the KMeansDriver
- Path answer = new Path(output, "kmeans_out");
- KMeansDriver.run(conf, data, initialclusters, answer,
- measure,convergenceDelta, maxIterations, true, 0.0, false);
+ public static void run(
+ Configuration conf,
+ Path input,
+ Path output,
+ int numDims,
+ int clusters,
+ DistanceMeasure measure,
+ double convergenceDelta,
+ int maxIterations,
+ Path tempDir,
+ boolean ssvd,
+ int numReducers,
+ int blockHeight,
+ int oversampling,
+ int poweriters)
+ throws IOException, InterruptedException, ClassNotFoundException {
+
+ Path outputCalc = new Path(tempDir, "calculations");
+ Path outputTmp = new Path(tempDir, "temporary");
+
+ // Take in the raw CSV text file and split it ourselves,
+ // creating our own SequenceFiles for the matrices to read later
+ // (similar to the style of syntheticcontrol.canopy.InputMapper)
+ Path affSeqFiles = new Path(outputCalc, "seqfile");
+ AffinityMatrixInputJob.runJob(input, affSeqFiles, numDims, numDims);
+
+ // Construct the affinity matrix using the newly-created sequence files
+ DistributedRowMatrix A =
+ new DistributedRowMatrix(affSeqFiles, new Path(outputTmp, "afftmp"), numDims, numDims);
+
+ Configuration depConf = new Configuration(conf);
+ A.setConf(depConf);
+
+ // Construct the diagonal matrix D (represented as a vector)
+ Vector D = MatrixDiagonalizeJob.runJob(affSeqFiles, numDims);
+
+ //Calculate the normalized Laplacian of the form: L = D^(-0.5)AD^(-0.5)
+ DistributedRowMatrix L = VectorMatrixMultiplicationJob.runJob(affSeqFiles, D,
+ new Path(outputCalc, "laplacian"), new Path(outputCalc, outputCalc));
+ L.setConf(depConf);
+
+ Path data;
+
+ if (ssvd) {
+ // SSVD requires an array of Paths to function. So we pass in an array of length one
+ Path [] LPath = new Path[1];
+ LPath[0] = L.getRowPath();
+
+ Path SSVDout = new Path(outputCalc, "SSVD");
+
+ SSVDSolver solveIt = new SSVDSolver(
+ depConf,
+ LPath,
+ SSVDout,
+ blockHeight,
+ clusters,
+ oversampling,
+ numReducers);
+
+ solveIt.setComputeV(false);
+ solveIt.setComputeU(true);
+ solveIt.setOverwrite(true);
+ solveIt.setQ(poweriters);
+ //solveIt.setBroadcast(false);
+ solveIt.run();
+ data = new Path(solveIt.getUPath());
+ } else {
+ // Perform eigen-decomposition using LanczosSolver
+ // since some of the eigen-output is spurious and will be eliminated
+ // upon verification, we have to aim to overshoot and then discard
+ // unnecessary vectors later
+ int overshoot = Math.min((int) ((double) clusters * OVERSHOOTMULTIPLIER), numDims);
+ DistributedLanczosSolver solver = new DistributedLanczosSolver();
+ LanczosState state = new LanczosState(L, overshoot, solver.getInitialVector(L));
+ Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors");
+
+ solver.runJob(conf,
+ state,
+ overshoot,
+ true,
+ lanczosSeqFiles.toString());
+
+ // perform a verification
+ EigenVerificationJob verifier = new EigenVerificationJob();
+ Path verifiedEigensPath = new Path(outputCalc, "eigenverifier");
+ verifier.runJob(conf,
+ lanczosSeqFiles,
+ L.getRowPath(),
+ verifiedEigensPath,
+ true,
+ 1.0,
+ clusters);
+
+ Path cleanedEigens = verifier.getCleanedEigensPath();
+ DistributedRowMatrix W = new DistributedRowMatrix(
+ cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
+ W.setConf(depConf);
+ DistributedRowMatrix Wtrans = W.transpose();
+ data = Wtrans.getRowPath();
+ }
+
+ // Normalize the rows of Wt to unit length
+ // normalize is important because it reduces the occurrence of two unique clusters combining into one
+ Path unitVectors = new Path(outputCalc, "unitvectors");
+
+ UnitVectorizerJob.runJob(data, unitVectors);
+
+ DistributedRowMatrix Wt = new DistributedRowMatrix(
+ unitVectors, new Path(unitVectors, "tmp"), clusters, numDims);
+ Wt.setConf(depConf);
+ data = Wt.getRowPath();
+
+ // Generate random initial clusters
+ Path initialclusters = RandomSeedGenerator.buildRandom(conf, data,
+ new Path(output, Cluster.INITIAL_CLUSTERS_DIR), clusters, measure);
+
+ // Run the KMeansDriver
+ Path answer = new Path(output, "kmeans_out");
+ KMeansDriver.run(conf, data, initialclusters, answer,
+ measure,convergenceDelta, maxIterations, true, 0.0, false);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/PathDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/PathDirectory.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/PathDirectory.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/PathDirectory.java Sun Mar 24 20:05:50 2013
@@ -91,4 +91,4 @@ public final class PathDirectory {
return new Path(clusterPostProcessorOutput + File.separator + clusterId);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java Sun Mar 24 20:05:50 2013
@@ -63,4 +63,4 @@ public final class ClusterCountReader {
return numberOfClusters;
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java Sun Mar 24 20:05:50 2013
@@ -104,7 +104,7 @@ public final class ClusterOutputPostProc
private void putVectorInRespectiveCluster(String clusterId, WeightedVectorWritable point) throws IOException {
Writer writer = findWriterForVector(clusterId);
postProcessedClusterDirectories.put(clusterId,
- PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId));
+ PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId));
writeVectorToCluster(writer, point);
}
@@ -141,4 +141,4 @@ public final class ClusterOutputPostProc
this.clusteredPoints = clusteredPoints;
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java Sun Mar 24 20:05:50 2013
@@ -42,7 +42,7 @@ import org.apache.mahout.math.VectorWrit
* used for top down clustering. It can also be used if the clustering output needs to be grouped into their
* respective clusters.
*/
-public class ClusterOutputPostProcessorDriver extends AbstractJob {
+public final class ClusterOutputPostProcessorDriver extends AbstractJob {
/**
* CLI to run clustering post processor. The input to post processor is the ouput path specified to the
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Sun Mar 24 20:05:50 2013
@@ -316,8 +316,10 @@ public abstract class AbstractJob extend
/**
*
* @param args The args to parse
- * @param inputOptional if false, then the input option, if set, need not be present. If true and input is an option and there is no input, then throw an error
- * @param outputOptional if false, then the output option, if set, need not be present. If true and output is an option and there is no output, then throw an error
+ * @param inputOptional if false, then the input option, if set, need not be present. If true and input is an option
+ * and there is no input, then throw an error
+ * @param outputOptional if false, then the output option, if set, need not be present. If true and output is an
+ * option and there is no output, then throw an error
* @return the args parsed into a map.
*/
public Map<String, List<String>> parseArguments(String[] args, boolean inputOptional, boolean outputOptional)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Sun Mar 24 20:05:50 2013
@@ -98,8 +98,10 @@ public final class HadoopUtil {
* @param outputPath The output {@link org.apache.hadoop.fs.Path}
* @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
* @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
- * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op, this value may be null
- * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op, this value may be null
+ * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op,
+ * this value may be null
+ * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op,
+ * this value may be null
* @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
* @param reducerKey The reducer key class.
* @param reducerValue The reducer value class.
@@ -109,7 +111,8 @@ public final class HadoopUtil {
* @throws IOException if there is a problem with the IO.
*
* @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
- * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, org.apache.hadoop.conf.Configuration)
+ * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
+ * org.apache.hadoop.conf.Configuration)
*/
public static Job prepareJob(Path inputPath,
Path outputPath,
@@ -203,7 +206,9 @@ public final class HadoopUtil {
}
/**
- * Count all the records in a directory using a {@link org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator}
+ * Count all the records in a directory using a
+ * {@link org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator}
+ *
* @param path The {@link org.apache.hadoop.fs.Path} to count
* @param pt The {@link org.apache.mahout.common.iterator.sequencefile.PathType}
* @param filter Apply the {@link org.apache.hadoop.fs.PathFilter}. May be null
@@ -226,7 +231,8 @@ public final class HadoopUtil {
return fs.open(path.makeQualified(fs));
}
- public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter, Comparator<FileStatus> ordering, Configuration conf) throws IOException {
+ public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter,
+ Comparator<FileStatus> ordering, Configuration conf) throws IOException {
FileStatus[] statuses;
FileSystem fs = path.getFileSystem(conf);
if (filter == null) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java Sun Mar 24 20:05:50 2013
@@ -77,4 +77,4 @@ public final class LongPair implements C
}
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java Sun Mar 24 20:05:50 2013
@@ -76,4 +76,4 @@ public final class FileLineIterable impl
}
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/SamplingIterable.java Sun Mar 24 20:05:50 2013
@@ -42,4 +42,4 @@ public final class SamplingIterable<T> i
return samplingRate >= 1.0 ? delegate : new SamplingIterable<T>(delegate, samplingRate);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StringRecordIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StringRecordIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StringRecordIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StringRecordIterator.java Sun Mar 24 20:05:50 2013
@@ -52,4 +52,4 @@ public class StringRecordIterator extend
return delegate;
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java Sun Mar 24 20:05:50 2013
@@ -105,4 +105,4 @@ public final class SequenceFileIterator<
}
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java Sun Mar 24 20:05:50 2013
@@ -85,4 +85,4 @@ public final class SequenceFileValueIter
}
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java Sun Mar 24 20:05:50 2013
@@ -27,8 +27,8 @@ public final class AnalyzerUtils {
}
/**
- * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}. Note, if you need to pass in parameters
- * to your constructor, you will need to wrap it in an implementation that does not take any arguments
+ * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}. Note, if you need to pass in
+ * parameters to your constructor, you will need to wrap it in an implementation that does not take any arguments
* @param analyzerClassName - Lucene Analyzer Name
* @return {@link Analyzer}
* @throws ClassNotFoundException - {@link ClassNotFoundException}
@@ -45,16 +45,16 @@ public final class AnalyzerUtils {
}
/**
- * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}. Note, if you need to pass in parameters
- * to your constructor, you will need to wrap it in an implementation that does not take any arguments
+ * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}. Note, if you need to pass in
+ * parameters to your constructor, you will need to wrap it in an implementation that does not take any arguments
* @param analyzerClass The Analyzer Class to instantiate
* @return {@link Analyzer}
*/
- public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass){
+ public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass) {
return createAnalyzer(analyzerClass, Version.LUCENE_41);
}
- public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version){
+ public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version) {
Analyzer analyzer;
if (analyzerClass == StandardAnalyzer.class) {
Class<?>[] params = new Class<?>[1];
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java Sun Mar 24 20:05:50 2013
@@ -26,8 +26,9 @@ import java.io.IOException;
/**
* Provide an Iterator for the tokens in a TokenStream.
*
- * Note, it is the responsibility of the instantiating class to properly consume the {@link org.apache.lucene.analysis.TokenStream}. See
- * the Lucene {@link org.apache.lucene.analysis.TokenStream} documentation for more information.
+ * Note, it is the responsibility of the instantiating class to properly consume the
+ * {@link org.apache.lucene.analysis.TokenStream}. See the Lucene {@link org.apache.lucene.analysis.TokenStream}
+ * documentation for more information.
*/
//TODO: consider using the char/byte arrays instead of strings, esp. when we upgrade to Lucene 4.0
public final class TokenStreamIterator extends AbstractIterator<String> {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsCombiner.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsCombiner.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/MergeVectorsCombiner.java Sun Mar 24 20:05:50 2013
@@ -31,4 +31,4 @@ public class MergeVectorsCombiner
throws IOException, InterruptedException {
ctx.write(key, VectorWritable.merge(vectors.iterator()));
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/TransposeMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/TransposeMapper.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/TransposeMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/mapreduce/TransposeMapper.java Sun Mar 24 20:05:50 2013
@@ -40,4 +40,4 @@ public class TransposeMapper extends Map
ctx.write(r, new VectorWritable(tmp));
}
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java Sun Mar 24 20:05:50 2013
@@ -117,4 +117,4 @@ public abstract class AbstractParameter<
}
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parameter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parameter.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parameter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parameter.java Sun Mar 24 20:05:50 2013
@@ -59,4 +59,4 @@ public interface Parameter<T> extends Pa
/** @return value used if not set by consumer */
String defaultValue();
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Sun Mar 24 20:05:50 2013
@@ -195,7 +195,8 @@ public final class MahoutDriver {
programDriver.driver(argsList.toArray(new String[argsList.size()]));
if (log.isInfoEnabled()) {
- log.info("Program took {} ms (Minutes: {})", System.currentTimeMillis() - start, (System.currentTimeMillis() - start)/60000.0);
+ log.info("Program took {} ms (Minutes: {})", System.currentTimeMillis() - start,
+ (System.currentTimeMillis() - start) / 60000.0);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java Sun Mar 24 20:05:50 2013
@@ -23,4 +23,4 @@
* mapping is useful for values that must stay within a range but whose distribution is roughly exponential near
* geometric mean of the end-points. An identity mapping is also supplied.</p>
*/
-package org.apache.mahout.ep;
\ No newline at end of file
+package org.apache.mahout.ep;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java Sun Mar 24 20:05:50 2013
@@ -55,7 +55,7 @@ public final class ParallelFPGrowthReduc
private int maxPerGroup;
private boolean useFP2;
- private static class IteratorAdapter implements Iterator<Pair<List<Integer>,Long>> {
+ private static final class IteratorAdapter implements Iterator<Pair<List<Integer>,Long>> {
private final Iterator<Pair<IntArrayList,Long>> innerIter;
private IteratorAdapter(Iterator<Pair<IntArrayList,Long>> transactionIter) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java Sun Mar 24 20:05:50 2013
@@ -34,7 +34,7 @@ public class TransactionIterator<T> exte
private final int[] transactionBuffer;
private final Iterator<Pair<int[],Long>> delegate;
-
+
public TransactionIterator(Iterator<Pair<List<T>,Long>> transactions, final Map<T,Integer> attributeIdMapping) {
transactionBuffer = new int[attributeIdMapping.size()];
delegate = Iterators.transform(
@@ -43,8 +43,8 @@ public class TransactionIterator<T> exte
@Override
public Pair<int[],Long> apply(Pair<List<T>,Long> from) {
if (from == null) {
- return null;
- }
+ return null;
+ }
int index = 0;
for (T attribute : from.getFirst()) {
if (attributeIdMapping.containsKey(attribute)) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java Sun Mar 24 20:05:50 2013
@@ -373,4 +373,4 @@ public final class FPTree {
return sb.toString();
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java Sun Mar 24 20:05:50 2013
@@ -46,4 +46,4 @@
* increases the memory consumption but might improve speed until a certain point. This depends entirely on
* the dataset in question. A value of 5-10 is recommended for mining up to top 100 patterns for each feature.</p>
*/
-package org.apache.mahout.fpm.pfpgrowth;
\ No newline at end of file
+package org.apache.mahout.fpm.pfpgrowth;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java Sun Mar 24 20:05:50 2013
@@ -83,4 +83,4 @@ public class VarIntWritable implements W
value = Varint.readSignedVarInt(in);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java Sun Mar 24 20:05:50 2013
@@ -80,4 +80,4 @@ public class VarLongWritable implements
value = Varint.readSignedVarLong(in);
}
-}
\ No newline at end of file
+}