You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/03/24 21:05:55 UTC
svn commit: r1460431 [3/3] - in /mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/common/
core/src/main/java/org/apache/mahout/cf/taste/eval/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java Sun Mar 24 20:05:50 2013
@@ -218,11 +218,11 @@ public class HdfsBackedLanczosState exte
Vector norms = fetchVector(new Path(baseDir, "norms"), 0);
Vector projections = fetchVector(new Path(baseDir, "projections"), 0);
if (norms != null && projections != null) {
- int i=0;
- while (i<projections.size()-1) {
+ int i = 0;
+ while (i < projections.size() - 1) {
diagonalMatrix.set(i, i, projections.get(i));
- diagonalMatrix.set(i, i+1, norms.get(i));
- diagonalMatrix.set(i+1, i, norms.get(i));
+ diagonalMatrix.set(i, i + 1, norms.get(i));
+ diagonalMatrix.set(i + 1, i, norms.get(i));
i++;
}
diagonalMatrix.set(i, i, projections.get(i));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Sun Mar 24 20:05:50 2013
@@ -406,7 +406,7 @@ public class RowSimilarityJob extends Ab
protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx)
throws IOException, InterruptedException {
Vector similarities = similaritiesWritable.get();
- // For performance reasons, the creation of transposedPartial is moved out of the while loop and it is reused inside
+ // For performance, the creation of transposedPartial is moved out of the while loop and it is reused inside
Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1);
TopK<Vector.Element> topKQueue = new TopK<Vector.Element>(maxSimilaritiesPerRow, Vectors.BY_VALUE);
Iterator<Vector.Element> nonZeroElements = similarities.iterateNonZero();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/BasicStats.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/BasicStats.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/BasicStats.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/BasicStats.java Sun Mar 24 20:05:50 2013
@@ -52,10 +52,10 @@ public final class BasicStats {
Configuration baseConf) throws IOException, InterruptedException,
ClassNotFoundException {
- VarianceTotals varianceTotals = computeVarianceTotals(input, output, baseConf);
+ VarianceTotals varianceTotals = computeVarianceTotals(input, output, baseConf);
return varianceTotals.computeVariance();
}
-
+
/**
* Calculate the variance by a predefined mean of values stored as
*
@@ -69,10 +69,10 @@ public final class BasicStats {
Configuration baseConf) throws IOException, InterruptedException,
ClassNotFoundException {
- VarianceTotals varianceTotals = computeVarianceTotals(input, output, baseConf);
+ VarianceTotals varianceTotals = computeVarianceTotals(input, output, baseConf);
return varianceTotals.computeVarianceForGivenMean(mean);
}
-
+
private static VarianceTotals computeVarianceTotals(Path input, Path output,
Configuration baseConf) throws IOException, InterruptedException,
ClassNotFoundException {
@@ -80,9 +80,10 @@ public final class BasicStats {
conf.set("io.serializations",
"org.apache.hadoop.io.serializer.JavaSerialization,"
+ "org.apache.hadoop.io.serializer.WritableSerialization");
- Job job = HadoopUtil.prepareJob(input, output, SequenceFileInputFormat.class, StandardDeviationCalculatorMapper.class,
- IntWritable.class, DoubleWritable.class, StandardDeviationCalculatorReducer.class,
- IntWritable.class, DoubleWritable.class, SequenceFileOutputFormat.class, conf);
+ Job job = HadoopUtil.prepareJob(input, output, SequenceFileInputFormat.class,
+ StandardDeviationCalculatorMapper.class, IntWritable.class, DoubleWritable.class,
+ StandardDeviationCalculatorReducer.class, IntWritable.class, DoubleWritable.class,
+ SequenceFileOutputFormat.class, conf);
HadoopUtil.delete(conf, output);
job.setCombinerClass(StandardDeviationCalculatorReducer.class);
boolean succeeded = job.waitForCompletion(true);
@@ -109,12 +110,12 @@ public final class BasicStats {
sum += ((DoubleWritable) record.getSecond()).get();
}
}
-
+
VarianceTotals varianceTotals = new VarianceTotals();
varianceTotals.setSum(sum);
varianceTotals.setSumOfSquares(sumOfSquares);
varianceTotals.setTotalCount(totalCount);
-
+
return varianceTotals;
}
@@ -131,7 +132,7 @@ public final class BasicStats {
ClassNotFoundException {
return Math.sqrt(variance(input, output, baseConf));
}
-
+
/**
* Calculate the standard deviation given a predefined mean
*
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java Sun Mar 24 20:05:50 2013
@@ -52,7 +52,7 @@ public final class VarianceTotals {
}
public double computeMean() {
- return sum/totalCount;
+ return sum / totalCount;
}
public double computeVariance() {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java Sun Mar 24 20:05:50 2013
@@ -133,9 +133,8 @@ public class Omega {
public Double call() throws Exception {
double result = 0.0;
if (v.isDense()) {
- for (int k = 0; k < v.size(); k++)
+ for (int k = 0; k < v.size(); k++) {
// it's ok, this is reentrant
- {
result += getQuick(k, index) * v.getQuick(k);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java Sun Mar 24 20:05:50 2013
@@ -514,7 +514,7 @@ public final class SSVDSolver {
* we currently use a 3rd party in-core eigensolver. So we need just a
* dense array representation for it.
*/
- DenseMatrix bbtSquare = new DenseMatrix(k+p,k+p);
+ DenseMatrix bbtSquare = new DenseMatrix(k + p, k + p);
for (int i = 0; i < k + p; i++) {
for (int j = i; j < k + p; j++) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SplitPartitionedWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SplitPartitionedWritable.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SplitPartitionedWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SplitPartitionedWritable.java Sun Mar 24 20:05:50 2013
@@ -147,4 +147,4 @@ public class SplitPartitionedWritable im
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java Sun Mar 24 20:05:50 2013
@@ -156,13 +156,14 @@ public class UJob {
SSVDSolver.OutputScalingEnum.valueOf(context.getConfiguration()
.get(PROP_OUTPUT_SCALING));
switch (outputScaling) {
- case SIGMA:
- sValues = SSVDHelper.loadVector(sigmaPath, context.getConfiguration());
- break;
- case HALFSIGMA:
- sValues = SSVDHelper.loadVector(sigmaPath, context.getConfiguration());
- sValues.assign(Functions.SQRT);
- break;
+ case SIGMA:
+ sValues = SSVDHelper.loadVector(sigmaPath, context.getConfiguration());
+ break;
+ case HALFSIGMA:
+ sValues = SSVDHelper.loadVector(sigmaPath, context.getConfiguration());
+ sValues.assign(Functions.SQRT);
+ break;
+ default:
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java Sun Mar 24 20:05:50 2013
@@ -114,13 +114,14 @@ public class VJob {
SSVDSolver.OutputScalingEnum.valueOf(context.getConfiguration()
.get(PROP_OUTPUT_SCALING));
switch (outputScaling) {
- case SIGMA:
- sValues.assign(1.0);
- break;
- case HALFSIGMA:
- sValues = SSVDHelper.loadVector(sigmaPath, context.getConfiguration());
- sValues.assign(Functions.SQRT);
- break;
+ case SIGMA:
+ sValues.assign(1.0);
+ break;
+ case HALFSIGMA:
+ sValues = SSVDHelper.loadVector(sigmaPath, context.getConfiguration());
+ sValues.assign(Functions.SQRT);
+ break;
+ default:
}
/*
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java Sun Mar 24 20:05:50 2013
@@ -74,6 +74,6 @@ public class Sampler {
private int sample(double[] sampler) {
int index = Arrays.binarySearch(sampler, random.nextDouble());
- return index < 0 ? -(index+1) : index;
+ return index < 0 ? -(index + 1) : index;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java Sun Mar 24 20:05:50 2013
@@ -87,12 +87,13 @@ public final class EncodedVectorsFromSeq
String encoderClass = LuceneTextValueEncoder.class.getName();
if (hasOption("encoderClass")) {
encoderClass = getOption("encoderClass");
- ClassUtils.instantiateAs(encoderClass, FeatureVectorEncoder.class, new Class[]{String.class}, new Object[]{encoderName}); //try instantiating it
+ ClassUtils.instantiateAs(encoderClass, FeatureVectorEncoder.class, new Class[] { String.class },
+ new Object[] { encoderName }); //try instantiating it
}
SimpleTextEncodingVectorizer vectorizer = new SimpleTextEncodingVectorizer();
- VectorizerConfig config = new VectorizerConfig(conf, analyzerClass.getName(), encoderClass, encoderName, sequentialAccessOutput,
- namedVectors, cardinality);
+ VectorizerConfig config = new VectorizerConfig(conf, analyzerClass.getName(), encoderClass, encoderName,
+ sequentialAccessOutput, namedVectors, cardinality);
vectorizer.createVectors(input, output, config);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Sun Mar 24 20:05:50 2013
@@ -264,9 +264,9 @@ public final class SparseVectorsFromSequ
if (cmdLine.hasOption(namedVectorOpt)) {
namedVectors = true;
}
- boolean shouldPrune = maxDFSigma >=0.0;
+ boolean shouldPrune = maxDFSigma >= 0.0;
String tfDirName = shouldPrune
- ? DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER+"-toprune"
+ ? DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-toprune"
: DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER;
if (processIdf) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java Sun Mar 24 20:05:50 2013
@@ -99,7 +99,7 @@ public class LuceneTextValueEncoder exte
// do nothing
}
}
- //GSI: TODO: we really need a way to make sure we call the TokenStream workflow here (i.e. end and close when we are done)
+ //GSI: TODO: we really need a way to make sure we call the TokenStream workflow here (i.e. end and close when done)
private static final class LuceneTokenIterable implements Iterable<String> {
private boolean firstTime = true;
private final TokenStream tokenStream;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Sun Mar 24 20:05:50 2013
@@ -32,6 +32,7 @@ import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -197,7 +198,7 @@ public class DisplayClustering extends F
Path clusteredPointsPath = new Path(data, "clusteredPoints");
Path inputPath = new Path(clusteredPointsPath, "part-m-00000");
- HashMap<Integer,Color> colors = new HashMap<Integer,Color>();
+ Map<Integer,Color> colors = new HashMap<Integer,Color>();
int point = 0;
for (Pair<IntWritable,WeightedVectorWritable> record : new SequenceFileIterable<IntWritable,WeightedVectorWritable>(
inputPath, new Configuration())) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java Sun Mar 24 20:05:50 2013
@@ -46,20 +46,18 @@ public class DisplayDirichlet extends Di
public DisplayDirichlet() {
initialize();
- this.setTitle("Dirichlet Process Clusters - Normal Distribution (>" + (int) (significance * 100)
- + "% of population)");
+ setTitle("Dirichlet Process Clusters - Normal Distribution (>" + (int) (significance * 100) + "% of population)");
}
-
- // Override the paint() method
+
@Override
public void paint(Graphics g) {
plotSampleData((Graphics2D) g);
plotClusters((Graphics2D) g);
}
- protected static void generateResults(Path input, Path output,
- ModelDistribution<VectorWritable> modelDist, int numClusters, int numIterations, double alpha0, int thin, int burnin) throws IOException, ClassNotFoundException,
- InterruptedException {
+ protected static void generateResults(Path input, Path output, ModelDistribution<VectorWritable> modelDist,
+ int numClusters, int numIterations, double alpha0, int thin, int burnin) throws IOException,
+ ClassNotFoundException, InterruptedException {
boolean runClusterer = true;
if (runClusterer) {
runSequentialDirichletClusterer(input, output, modelDist, numClusters, numIterations, alpha0);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Sun Mar 24 20:05:50 2013
@@ -102,7 +102,8 @@ public class DisplayFuzzyKMeans extends
ClassNotFoundException, InterruptedException {
Path clustersIn = new Path(output, "random-seeds");
RandomSeedGenerator.buildRandom(conf, samples, clustersIn, 3, measure);
- FuzzyKMeansDriver.run(samples, clustersIn, output, measure, threshold, maxIterations, m, true, true, threshold, true);
+ FuzzyKMeansDriver.run(samples, clustersIn, output, measure, threshold, maxIterations, m, true, true, threshold,
+ true);
loadClustersWritable(output);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java Sun Mar 24 20:05:50 2013
@@ -38,7 +38,7 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.function.Functions;
-public class DisplayMeanShift extends DisplayClustering {
+public final class DisplayMeanShift extends DisplayClustering {
private static double t1;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java Sun Mar 24 20:05:50 2013
@@ -96,12 +96,12 @@ public class DisplayMinHash extends Disp
private static final int SYMBOLS_FONT_SIZE = 6;
- private static final Map<String, List<Vector>> clusters = new HashMap<String, List<Vector>>();
+ private static final Map<String, List<Vector>> CLUSTERS = new HashMap<String, List<Vector>>();
private static Iterator<Entry<String, List<Vector>>> currentCluster;
private static List<Vector> currentClusterPoints;
private static int updatePeriodTime;
- private static long lastUpdateTime = 0;
- private static boolean isSlideShowOnHold = false;
+ private static long lastUpdateTime;
+ private static boolean isSlideShowOnHold;
private PlotType plotType = PlotType.POINTS;
@@ -149,12 +149,14 @@ public class DisplayMinHash extends Disp
case POINTS:
plotPoints(g2);
break;
+ default:
+ throw new IllegalStateException("Unknown plot type: " + plotType);
}
}
private static void plotLines(Graphics2D g2) {
Random rand = RandomUtils.getRandom();
- for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
+ for (Map.Entry<String, List<Vector>> entry : CLUSTERS.entrySet()) {
List<Vector> vecs = entry.getValue();
g2.setColor(new Color(rand.nextInt()));
@@ -179,7 +181,7 @@ public class DisplayMinHash extends Disp
private static void plotSymbols(Graphics2D g2) {
char symbol = 0;
Random rand = RandomUtils.getRandom();
- for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
+ for (Map.Entry<String, List<Vector>> entry : CLUSTERS.entrySet()) {
List<Vector> vecs = entry.getValue();
g2.setColor(new Color(rand.nextInt()));
@@ -193,7 +195,7 @@ public class DisplayMinHash extends Disp
private static void plotPoints(Graphics2D g2) {
if (currentCluster == null || !currentCluster.hasNext()) {
- currentCluster = clusters.entrySet().iterator();
+ currentCluster = CLUSTERS.entrySet().iterator();
}
if (System.currentTimeMillis() - lastUpdateTime > updatePeriodTime) {
@@ -328,7 +330,7 @@ public class DisplayMinHash extends Disp
private static void logClusters() {
int i = 0;
- for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
+ for (Map.Entry<String, List<Vector>> entry : CLUSTERS.entrySet()) {
StringBuilder logStr = new StringBuilder();
logStr.append("Cluster N:").append(++i).append(": ");
List<Vector> vecs = entry.getValue();
@@ -349,24 +351,21 @@ public class DisplayMinHash extends Disp
while (iterator.hasNext()) {
Pair<Text, VectorWritable> next = iterator.next();
String key = next.getFirst().toString();
- List<Vector> list = clusters.get(key);
+ List<Vector> list = CLUSTERS.get(key);
if (list == null) {
list = Lists.newArrayList();
- clusters.put(key, list);
+ CLUSTERS.put(key, list);
}
list.add(next.getSecond().get());
}
- log.info("Loaded: {} clusters", clusters.size());
+ log.info("Loaded: {} clusters", CLUSTERS.size());
}
- private static void runMinHash(Configuration conf, Path samples, Path output)
- throws Exception {
+ private static void runMinHash(Configuration conf, Path samples, Path output) throws Exception {
ToolRunner.run(conf, new MinHashDriver(), new String[] { "--input", samples.toString(),
- "--hashType", HashFactory.HashType.MURMUR3.toString(), "--output",
- output.toString(), "--minVectorSize", "1", "--debugOutput"
-
+ "--hashType", HashFactory.HashType.MURMUR3.toString(), "--output", output.toString(),
+ "--minVectorSize", "1", "--debugOutput"
});
-
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java Sun Mar 24 20:05:50 2013
@@ -43,7 +43,7 @@ public class DisplaySpectralKMeans exten
DisplaySpectralKMeans() {
initialize();
- this.setTitle("Spectral k-Means Clusters (>" + (int) (significance * 100) + "% of population)");
+ setTitle("Spectral k-Means Clusters (>" + (int) (significance * 100) + "% of population)");
}
public static void main(String[] args) throws Exception {
@@ -63,8 +63,9 @@ public class DisplaySpectralKMeans exten
if (!fs.exists(output)) {
fs.mkdirs(output);
}
- Writer writer = Files.newWriter(new File(affinities.toString()), Charsets.UTF_8);
+ Writer writer = null;
try {
+ writer = Files.newWriter(new File(affinities.toString()), Charsets.UTF_8);
for (int i = 0; i < SAMPLE_DATA.size(); i++) {
for (int j = 0; j < SAMPLE_DATA.size(); j++) {
writer.write(i + "," + j + ',' + measure.distance(SAMPLE_DATA.get(i).get(), SAMPLE_DATA.get(j).get()) + '\n');
@@ -75,11 +76,11 @@ public class DisplaySpectralKMeans exten
}
int maxIter = 10;
double convergenceDelta = 0.001;
- SpectralKMeansDriver.run(new Configuration(), affinities, output, SAMPLE_DATA.size(), 3, measure, convergenceDelta, maxIter, tempDir, false);
+ SpectralKMeansDriver.run(new Configuration(), affinities, output, SAMPLE_DATA.size(), 3, measure, convergenceDelta,
+ maxIter, tempDir, false);
new DisplaySpectralKMeans();
}
- // Override the paint() method
@Override
public void paint(Graphics g) {
plotClusteredSampleData((Graphics2D) g, new Path(OUTPUT));
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.java Sun Mar 24 20:05:50 2013
@@ -95,8 +95,8 @@ public final class LastfmClusterEvaluato
long similarListeners = 0;
long allListeners = 0;
int clustersProcessed = 0;
- for (Pair<Text,VectorWritable> record :
- new SequenceFileIterable<Text,VectorWritable>(clusterFile, true, conf)) {
+ for (Pair<Text,VectorWritable> record
+ : new SequenceFileIterable<Text,VectorWritable>(clusterFile, true, conf)) {
Text cluster = record.getFirst();
VectorWritable point = record.getSecond();
if (!cluster.equals(prevCluster)) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Sun Mar 24 20:05:50 2013
@@ -79,7 +79,7 @@ public final class Job extends AbstractJ
* the canopy T2 threshold
*/
private static void run(Path input, Path output, DistanceMeasure measure,
- double t1, double t2) throws Exception{
+ double t1, double t2) throws Exception {
Path directoryContainingConvertedInput = new Path(output,
DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput,
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Sun Mar 24 20:05:50 2013
@@ -43,7 +43,8 @@ public final class Job extends AbstractJ
private static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "data";
- private Job() {}
+ private Job() {
+ }
public static void main(String[] args) throws Exception {
if (args.length > 0) {
@@ -113,7 +114,8 @@ public final class Job extends AbstractJ
boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
double alpha0 = Double.parseDouble(getOption(DirichletDriver.ALPHA_OPTION));
- DistributionDescription description = new DistributionDescription(modelFactory, modelPrototype, distanceMeasure, 60);
+ DistributionDescription description = new DistributionDescription(modelFactory, modelPrototype, distanceMeasure,
+ 60);
run(input, output, description, numModels, maxIterations, alpha0, emitMostLikely, threshold);
return 0;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Sun Mar 24 20:05:50 2013
@@ -47,7 +47,8 @@ public final class Job extends AbstractJ
private static final String M_OPTION = FuzzyKMeansDriver.M_OPTION;
- private Job() {}
+ private Job() {
+ }
public static void main(String[] args) throws Exception {
if (args.length > 0) {
@@ -135,8 +136,8 @@ public final class Job extends AbstractJ
CanopyDriver
.run(new Configuration(), directoryContainingConvertedInput, canopyOutput, measure, t1, t2, false, 0.0, false);
log.info("Running FuzzyKMeans");
- FuzzyKMeansDriver.run(directoryContainingConvertedInput, new Path(canopyOutput, "clusters-0-final"), output, measure,
- convergenceDelta, maxIterations, fuzziness, true, true, 0.0, false);
+ FuzzyKMeansDriver.run(directoryContainingConvertedInput, new Path(canopyOutput, "clusters-0-final"), output,
+ measure, convergenceDelta, maxIterations, fuzziness, true, true, 0.0, false);
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output,
"clusteredPoints"));
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Sun Mar 24 20:05:50 2013
@@ -45,7 +45,8 @@ public final class Job extends AbstractJ
private static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "data";
- private Job() {}
+ private Job() {
+ }
public static void main(String[] args) throws Exception {
if (args.length > 0) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=1460431&r1=1460430&r2=1460431&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Sun Mar 24 20:05:50 2013
@@ -43,7 +43,8 @@ public final class Job extends AbstractJ
private static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "data";
- private Job() {}
+ private Job() {
+ }
public static void main(String[] args) throws Exception {
if (args.length > 0) {
@@ -54,14 +55,13 @@ public final class Job extends AbstractJ
Path output = new Path("output");
Configuration conf = new Configuration();
HadoopUtil.delete(conf, output);
- run(conf, new Path("testdata"), output,
- new EuclideanDistanceMeasure(), new TriangularKernelProfile(), 47.6,
- 1, 0.5, 10);
+ run(conf, new Path("testdata"), output, new EuclideanDistanceMeasure(), new TriangularKernelProfile(), 47.6, 1,
+ 0.5, 10);
}
}
@Override
- public int run(String[] args) throws Exception{
+ public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.convergenceOption().create());
@@ -133,7 +133,7 @@ public final class Job extends AbstractJ
double t2,
double convergenceDelta,
int maxIterations)
- throws Exception{
+ throws Exception {
Path directoryContainingConvertedInput = new Path(output,
DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput);