You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/05/10 13:30:15 UTC
svn commit: r1101411 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/
core/src/main/java/org/apache/mahout/clustering/lda/
core/src/test/java/org/apache/mahout/clustering/
core/src/test/java/org/apache/mahout/math/hadoop/decomposer/...
Author: srowen
Date: Tue May 10 11:30:14 2011
New Revision: 1101411
URL: http://svn.apache.org/viewvc?rev=1101411&view=rev
Log:
Style re-changes for MAHOUT-683, MAHOUT-682
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java Tue May 10 11:30:14 2011
@@ -18,7 +18,6 @@ package org.apache.mahout.clustering;
import java.io.IOException;
import java.util.Iterator;
-import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -38,7 +37,6 @@ import org.apache.mahout.math.VectorWrit
* with a set of models. To date, it has been tested with k-means and Dirichlet
* clustering. See examples DisplayKMeans and DisplayDirichlet which have been
* switched over to use it.
- *
*/
public class ClusterIterator {
@@ -53,15 +51,14 @@ public class ClusterIterator {
* iterations
*
* @param data
- * a List<Vector> of input vectors
+ * a {@code List<Vector>} of input vectors
* @param classifier
* a prior ClusterClassifier
* @param numIterations
* the int number of iterations to perform
* @return the posterior ClusterClassifier
*/
- public ClusterClassifier iterate(List<Vector> data,
- ClusterClassifier classifier, int numIterations) {
+ public ClusterClassifier iterate(Iterable<Vector> data, ClusterClassifier classifier, int numIterations) {
for (int iteration = 1; iteration <= numIterations; iteration++) {
for (Vector vector : data) {
// classification yields probabilities
@@ -69,8 +66,7 @@ public class ClusterIterator {
// policy selects weights for models given those probabilities
Vector weights = policy.select(probabilities);
// training causes all models to observe data
- for (Iterator<Vector.Element> it = weights.iterateNonZero(); it
- .hasNext();) {
+ for (Iterator<Vector.Element> it = weights.iterateNonZero(); it.hasNext();) {
int index = it.next().index();
classifier.train(index, vector, weights.get(index));
}
@@ -97,8 +93,7 @@ public class ClusterIterator {
* the int number of iterations to perform
* @throws IOException
*/
- public void iterate(Path inPath, Path priorPath, Path outPath,
- int numIterations) throws IOException {
+ public void iterate(Path inPath, Path priorPath, Path outPath, int numIterations) throws IOException {
ClusterClassifier classifier = readClassifier(priorPath);
Configuration conf = new Configuration();
for (int iteration = 1; iteration <= numIterations; iteration++) {
@@ -126,8 +121,7 @@ public class ClusterIterator {
}
}
- private void writeClassifier(ClusterClassifier classifier, Path outPath, String k)
- throws IOException {
+ private static void writeClassifier(ClusterClassifier classifier, Path outPath, String k) throws IOException {
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(outPath.toUri(), config);
SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, outPath,
@@ -137,7 +131,7 @@ public class ClusterIterator {
writer.close();
}
- private ClusterClassifier readClassifier(Path inPath) throws IOException {
+ private static ClusterClassifier readClassifier(Path inPath) throws IOException {
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(inPath.toUri(), config);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, inPath, config);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADocumentTopicMapper.java Tue May 10 11:30:14 2011
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.clustering.lda;
import org.apache.hadoop.conf.Configuration;
@@ -8,9 +25,9 @@ import org.apache.mahout.math.VectorWrit
import java.io.IOException;
-public class LDADocumentTopicMapper extends Mapper<WritableComparable<?>,VectorWritable,WritableComparable<?>,VectorWritable> {
+public class LDADocumentTopicMapper
+ extends Mapper<WritableComparable<?>,VectorWritable,WritableComparable<?>,VectorWritable> {
- private LDAState state;
private LDAInference infer;
@Override
@@ -19,9 +36,8 @@ public class LDADocumentTopicMapper exte
Context context) throws IOException, InterruptedException {
Vector wordCounts = wordCountsWritable.get();
- LDAInference.InferredDocument doc;
try {
- doc = infer.infer(wordCounts);
+ LDAInference.InferredDocument doc = infer.infer(wordCounts);
context.write(key, new VectorWritable(doc.getGamma().normalize(1)));
} catch (ArrayIndexOutOfBoundsException e1) {
throw new IllegalStateException(
@@ -32,17 +48,12 @@ public class LDADocumentTopicMapper exte
}
public void configure(LDAState myState) {
- this.state = myState;
- this.infer = new LDAInference(state);
+ this.infer = new LDAInference(myState);
}
public void configure(Configuration job) {
- try {
- LDAState myState = LDADriver.createState(job);
- configure(myState);
- } catch (IOException e) {
- throw new IllegalStateException("Error creating LDA State!", e);
- }
+ LDAState myState = LDADriver.createState(job);
+ configure(myState);
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java Tue May 10 11:30:14 2011
@@ -50,9 +50,9 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
-import java.util.List;
import java.util.Random;
/**
@@ -91,11 +91,11 @@ public final class LDADriver extends Abs
ToolRunner.run(new Configuration(), new LDADriver(), args);
}
- public static LDAState createState(Configuration job) throws IOException {
+ public static LDAState createState(Configuration job) {
return createState(job, false);
}
- public static LDAState createState(Configuration job, boolean empty) throws IOException {
+ public static LDAState createState(Configuration job, boolean empty) {
String statePath = job.get(STATE_IN_KEY);
int numTopics = Integer.parseInt(job.get(NUM_TOPICS_KEY));
int numWords = Integer.parseInt(job.get(NUM_WORDS_KEY));
@@ -224,7 +224,9 @@ public final class LDADriver extends Abs
conf.set(STATE_IN_KEY, stateIn.toString());
// point the output to a new directory per iteration
Path stateOut = new Path(output, "state-" + iteration);
- double ll = runSequential ? runIterationSequential(conf, input, stateOut) : runIteration(conf, input, stateIn, stateOut);
+ double ll = runSequential
+ ? runIterationSequential(conf, input, stateOut)
+ : runIteration(conf, input, stateIn, stateOut);
double relChange = (oldLL - ll) / oldLL;
// now point the input to the old output directory
@@ -239,11 +241,17 @@ public final class LDADriver extends Abs
if(runSequential) {
computeDocumentTopicProbabilitiesSequential(conf, input, new Path(output, "docTopics"));
} else {
- computeDocumentTopicProbabilities(conf, input, stateIn, new Path(output, "docTopics"), numTopics, numWords, topicSmoothing);
+ computeDocumentTopicProbabilities(conf,
+ input,
+ stateIn,
+ new Path(output, "docTopics"),
+ numTopics,
+ numWords,
+ topicSmoothing);
}
}
- private void writeInitialState(Path statePath, int numTopics, int numWords) throws IOException {
+ private static void writeInitialState(Path statePath, int numTopics, int numWords) throws IOException {
Configuration job = new Configuration();
FileSystem fs = statePath.getFileSystem(job);
@@ -272,7 +280,7 @@ public final class LDADriver extends Abs
}
}
- private void writeState(Configuration job, LDAState state, Path statePath) throws IOException {
+ private static void writeState(Configuration job, LDAState state, Path statePath) throws IOException {
FileSystem fs = statePath.getFileSystem(job);
DoubleWritable v = new DoubleWritable();
@@ -298,7 +306,7 @@ public final class LDADriver extends Abs
writer.close();
}
- private double findLL(Path statePath, Configuration job) throws IOException {
+ private static double findLL(Path statePath, Configuration job) throws IOException {
FileSystem fs = statePath.getFileSystem(job);
double ll = 0.0;
for (FileStatus status : fs.globStatus(new Path(statePath, "part-*"))) {
@@ -317,20 +325,19 @@ public final class LDADriver extends Abs
return ll;
}
- private double runIterationSequential(Configuration conf, Path input, Path stateOut)
- throws IOException, InterruptedException {
+ private double runIterationSequential(Configuration conf, Path input, Path stateOut) throws IOException {
if(state == null) {
state = createState(conf);
}
if(trainingCorpus == null) {
Class<? extends Writable> keyClass = peekAtSequenceFileForKeyType(conf, input);
- List<Pair<Writable,VectorWritable>> corpus = new LinkedList<Pair<Writable, VectorWritable>>();
- for(FileStatus fileStatus : FileSystem.get(conf).globStatus(new Path(input, "part-*"))) {
+ Collection<Pair<Writable, VectorWritable>> corpus = new LinkedList<Pair<Writable, VectorWritable>>();
+ for (FileStatus fileStatus : FileSystem.get(conf).globStatus(new Path(input, "part-*"))) {
Path inputPart = fileStatus.getPath();
SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), inputPart, conf);
Writable key = ReflectionUtils.newInstance(keyClass, conf);
VectorWritable value = new VectorWritable();
- while(reader.next(key, value)) {
+ while (reader.next(key, value)) {
Writable nextKey = ReflectionUtils.newInstance(keyClass, conf);
VectorWritable nextValue = new VectorWritable();
corpus.add(new Pair<Writable,VectorWritable>(key, value));
@@ -340,11 +347,11 @@ public final class LDADriver extends Abs
}
trainingCorpus = corpus;
}
- if(inference == null) {
+ if (inference == null) {
inference = new LDAInference(state);
}
- double ll = 0;
newState = createState(conf, true);
+ double ll = 0.0;
for(Pair<Writable, VectorWritable> slice : trainingCorpus) {
LDAInference.InferredDocument doc;
Vector wordCounts = slice.getSecond().get();
@@ -386,7 +393,7 @@ public final class LDADriver extends Abs
* @param stateOut
* the directory pathname for output state
*/
- private double runIteration(Configuration conf,
+ private static double runIteration(Configuration conf,
Path input,
Path stateIn,
Path stateOut)
@@ -412,13 +419,13 @@ public final class LDADriver extends Abs
return findLL(stateOut, conf);
}
- private void computeDocumentTopicProbabilities(Configuration conf,
- Path input,
- Path stateIn,
- Path outputPath,
- int numTopics,
- int numWords,
- double topicSmoothing)
+ private static void computeDocumentTopicProbabilities(Configuration conf,
+ Path input,
+ Path stateIn,
+ Path outputPath,
+ int numTopics,
+ int numWords,
+ double topicSmoothing)
throws IOException, InterruptedException, ClassNotFoundException {
conf.set(STATE_IN_KEY, stateIn.toString());
conf.set(NUM_TOPICS_KEY, Integer.toString(numTopics));
@@ -437,25 +444,24 @@ public final class LDADriver extends Abs
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setJarByClass(LDADriver.class);
- if (job.waitForCompletion(true) == false) {
+ if (!job.waitForCompletion(true)) {
throw new InterruptedException("LDA failed to compute and output document topic probabilities with: "+ stateIn);
}
}
private void computeDocumentTopicProbabilitiesSequential(Configuration conf, Path input, Path outputPath)
- throws IOException, ClassNotFoundException {
+ throws IOException {
FileSystem fs = input.getFileSystem(conf);
Class<? extends Writable> keyClass = peekAtSequenceFileForKeyType(conf, input);
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputPath, keyClass, VectorWritable.class);
Writable key = ReflectionUtils.newInstance(keyClass, conf);
- VectorWritable vw = new VectorWritable();
+ Writable vw = new VectorWritable();
for(Pair<Writable, VectorWritable> slice : trainingCorpus) {
- LDAInference.InferredDocument doc;
Vector wordCounts = slice.getSecond().get();
try {
- doc = inference.infer(wordCounts);
+ inference.infer(wordCounts);
} catch (ArrayIndexOutOfBoundsException e1) {
throw new IllegalStateException(
"This is probably because the --numWords argument is set too small. \n"
@@ -472,7 +478,7 @@ public final class LDADriver extends Abs
try {
SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), input, conf);
return (Class<? extends Writable>) reader.getKeyClass();
- } catch(IOException ioe) {
+ } catch (IOException ioe) {
return Text.class;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAWordTopicMapper.java Tue May 10 11:30:14 2011
@@ -23,7 +23,6 @@ import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.common.IntPairWritable;
@@ -94,12 +93,8 @@ public class LDAWordTopicMapper extends
}
public void configure(Configuration job) {
- try {
- LDAState myState = LDADriver.createState(job);
- configure(myState);
- } catch (IOException e) {
- throw new IllegalStateException("Error creating LDA State!", e);
- }
+ LDAState myState = LDADriver.createState(job);
+ configure(myState);
}
@Override
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java Tue May 10 11:30:14 2011
@@ -43,7 +43,7 @@ import org.junit.Test;
public final class TestClusterClassifier extends MahoutTestCase {
- private ClusterClassifier newDMClassifier() {
+ private static ClusterClassifier newDMClassifier() {
List<Cluster> models = new ArrayList<Cluster>();
DistanceMeasure measure = new ManhattanDistanceMeasure();
models.add(new DistanceMeasureCluster(new DenseVector(2).assign(1), 0,
@@ -51,11 +51,10 @@ public final class TestClusterClassifier
models.add(new DistanceMeasureCluster(new DenseVector(2), 1, measure));
models.add(new DistanceMeasureCluster(new DenseVector(2).assign(-1), 2,
measure));
- ClusterClassifier classifier = new ClusterClassifier(models);
- return classifier;
+ return new ClusterClassifier(models);
}
- private ClusterClassifier newClusterClassifier() {
+ private static ClusterClassifier newClusterClassifier() {
List<Cluster> models = new ArrayList<Cluster>();
DistanceMeasure measure = new ManhattanDistanceMeasure();
models.add(new org.apache.mahout.clustering.kmeans.Cluster(new DenseVector(
@@ -64,21 +63,19 @@ public final class TestClusterClassifier
2), 1, measure));
models.add(new org.apache.mahout.clustering.kmeans.Cluster(new DenseVector(
2).assign(-1), 2, measure));
- ClusterClassifier classifier = new ClusterClassifier(models);
- return classifier;
+ return new ClusterClassifier(models);
}
- private ClusterClassifier newSoftClusterClassifier() {
+ private static ClusterClassifier newSoftClusterClassifier() {
List<Cluster> models = new ArrayList<Cluster>();
DistanceMeasure measure = new ManhattanDistanceMeasure();
models.add(new SoftCluster(new DenseVector(2).assign(1), 0, measure));
models.add(new SoftCluster(new DenseVector(2), 1, measure));
models.add(new SoftCluster(new DenseVector(2).assign(-1), 2, measure));
- ClusterClassifier classifier = new ClusterClassifier(models);
- return classifier;
+ return new ClusterClassifier(models);
}
- private ClusterClassifier newGaussianClassifier() {
+ private static ClusterClassifier newGaussianClassifier() {
List<Cluster> models = new ArrayList<Cluster>();
models.add(new GaussianCluster(new DenseVector(2).assign(1),
new DenseVector(2).assign(1), 0));
@@ -86,8 +83,7 @@ public final class TestClusterClassifier
.assign(1), 1));
models.add(new GaussianCluster(new DenseVector(2).assign(-1),
new DenseVector(2).assign(1), 2));
- ClusterClassifier classifier = new ClusterClassifier(models);
- return classifier;
+ return new ClusterClassifier(models);
}
private ClusterClassifier writeAndRead(ClusterClassifier classifier)
@@ -99,8 +95,10 @@ public final class TestClusterClassifier
return readClassifier(config, path, fs);
}
- private void writeClassifier(ClusterClassifier classifier,
- Configuration config, Path path, FileSystem fs) throws IOException {
+ private static void writeClassifier(ClusterClassifier classifier,
+ Configuration config,
+ Path path,
+ FileSystem fs) throws IOException {
SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, path,
Text.class, ClusterClassifier.class);
Writable key = new Text("test");
@@ -108,11 +106,11 @@ public final class TestClusterClassifier
writer.close();
}
- private ClusterClassifier readClassifier(Configuration config, Path path,
- FileSystem fs) throws IOException {
- Writable key;
+ private static ClusterClassifier readClassifier(Configuration config,
+ Path path,
+ FileSystem fs) throws IOException {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);
- key = new Text();
+ Writable key = new Text();
ClusterClassifier classifierOut = new ClusterClassifier();
reader.next(key, classifierOut);
reader.close();
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java Tue May 10 11:30:14 2011
@@ -33,7 +33,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import java.util.Arrays;
public final class TestDistributedLanczosSolverCLI extends MahoutTestCase {
@@ -110,8 +110,7 @@ public final class TestDistributedLanczo
Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
Matrix eigenVectors = new DenseMatrix(30, corpus.numCols());
- Configuration conf = new Configuration();
- List<Double> eigenvalues = new ArrayList<Double>();
+ Collection<Double> eigenvalues = new ArrayList<Double>();
output = getTestTempDirPath("output2");
tmp = getTestTempDirPath("tmp2");
@@ -128,8 +127,8 @@ public final class TestDistributedLanczo
new DistributedLanczosSolver().new DistributedLanczosSolverJob().run(args);
Path cleanEigenvectors2 = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
Matrix eigenVectors2 = new DenseMatrix(35, corpus.numCols());
- conf = new Configuration();
- List<Double> newEigenValues = new ArrayList<Double>();
+ Configuration conf = new Configuration();
+ Collection<Double> newEigenValues = new ArrayList<Double>();
int i = 0;
for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(cleanEigenvectors, conf)) {
@@ -152,7 +151,7 @@ public final class TestDistributedLanczo
i++;
}
- List<Integer> oldEigensFound = new ArrayList<Integer>();
+ Collection<Integer> oldEigensFound = new ArrayList<Integer>();
for(int row = 0; row < eigenVectors.numRows(); row++) {
Vector oldEigen = eigenVectors.getRow(row);
if(oldEigen == null) {
@@ -170,7 +169,7 @@ public final class TestDistributedLanczo
}
assertEquals("the number of new eigenvectors", 30, i);
- List<Double> oldEigenValuesNotFound = new ArrayList<Double>();
+ Collection<Double> oldEigenValuesNotFound = new ArrayList<Double>();
for(double d : eigenvalues) {
boolean found = false;
for(double newD : newEigenValues) {
@@ -183,7 +182,7 @@ public final class TestDistributedLanczo
}
}
assertEquals("number of old eigenvalues not found: "
- + Arrays.toString(oldEigenValuesNotFound.toArray(new Double[0])),
+ + Arrays.toString(oldEigenValuesNotFound.toArray(new Double[oldEigenValuesNotFound.size()])),
0, oldEigenValuesNotFound.size());
assertEquals("did not find enough old eigenvectors", 16, oldEigensFound.size());
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Tue May 10 11:30:14 2011
@@ -300,25 +300,21 @@ public class DisplayClustering extends F
return (double) cluster.getNumPoints() / SAMPLE_DATA.size() > significance;
}
- protected static ClusterClassifier readClassifier(Configuration config, Path path)
- throws IOException {
- Writable key;
- SequenceFile.Reader reader = new SequenceFile.Reader(
- FileSystem.get(config), path, config);
- key = new Text();
- ClusterClassifier classifierOut = new ClusterClassifier();
- reader.next(key, classifierOut);
- reader.close();
- return classifierOut;
- }
+ protected static ClusterClassifier readClassifier(Configuration config, Path path) throws IOException {
+ SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(config), path, config);
+ Writable key = new Text();
+ ClusterClassifier classifierOut = new ClusterClassifier();
+ reader.next(key, classifierOut);
+ reader.close();
+ return classifierOut;
+ }
protected static void writeClassifier(ClusterClassifier classifier, Configuration config, Path path)
- throws IOException {
- SequenceFile.Writer writer = new SequenceFile.Writer(
- FileSystem.get(config), config, path, Text.class,
- ClusterClassifier.class);
- Writable key = new Text("test");
- writer.append(key, classifier);
- writer.close();
- }
+ throws IOException {
+ SequenceFile.Writer writer =
+ new SequenceFile.Writer(FileSystem.get(config), config, path, Text.class, ClusterClassifier.class);
+ Writable key = new Text("test");
+ writer.append(key, classifier);
+ writer.close();
+ }
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java Tue May 10 11:30:14 2011
@@ -42,8 +42,7 @@ import org.slf4j.LoggerFactory;
public class DisplayDirichlet extends DisplayClustering {
- private static final Logger log = LoggerFactory
- .getLogger(DisplayDirichlet.class);
+ private static final Logger log = LoggerFactory.getLogger(DisplayDirichlet.class);
public DisplayDirichlet() {
initialize();
@@ -66,8 +65,7 @@ public class DisplayDirichlet extends Di
for (int k = 0; k < r.length; k++) {
Cluster model = r[k];
if (model.count() > significant) {
- models.append('m').append(k).append(model.asFormatString(null))
- .append(", ");
+ models.append('m').append(k).append(model.asFormatString(null)).append(", ");
}
}
models.append('\n');
@@ -76,22 +74,23 @@ public class DisplayDirichlet extends Di
log.info(models.toString());
}
- protected static void generateResults(
- ModelDistribution<VectorWritable> modelDist, int numClusters,
- int numIterations, double alpha0, int thin, int burnin)
- throws IOException {
+ protected static void generateResults(ModelDistribution<VectorWritable> modelDist,
+ int numClusters,
+ int numIterations,
+ double alpha0,
+ int thin,
+ int burnin) throws IOException {
boolean runClusterer = false;
if (runClusterer) {
- runSequentialDirichletClusterer(modelDist, numClusters, numIterations, alpha0,
- thin, burnin);
+ runSequentialDirichletClusterer(modelDist, numClusters, numIterations, alpha0, thin, burnin);
} else {
runSequentialDirichletClassifier(modelDist, numClusters, numIterations);
}
}
- private static void runSequentialDirichletClassifier(
- ModelDistribution<VectorWritable> modelDist, int numClusters,
- int numIterations) throws IOException {
+ private static void runSequentialDirichletClassifier(ModelDistribution<VectorWritable> modelDist,
+ int numClusters,
+ int numIterations) throws IOException {
List<Cluster> models = new ArrayList<Cluster>();
for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(numClusters)) {
models.add((Cluster) cluster);
@@ -103,13 +102,10 @@ public class DisplayDirichlet extends Di
Configuration conf = new Configuration();
writeClassifier(prior, conf, priorClassifier);
- ClusteringPolicy policy = new DirichletClusteringPolicy(numClusters,
- numIterations);
- new ClusterIterator(policy).iterate(samples, priorClassifier, output,
- numIterations);
+ ClusteringPolicy policy = new DirichletClusteringPolicy(numClusters, numIterations);
+ new ClusterIterator(policy).iterate(samples, priorClassifier, output, numIterations);
for (int i = 1; i <= numIterations; i++) {
- ClusterClassifier posterior = readClassifier(conf, new Path(output,
- "classifier-" + i));
+ ClusterClassifier posterior = readClassifier(conf, new Path(output, "classifier-" + i));
List<Cluster> clusters = new ArrayList<Cluster>();
for (Cluster cluster : posterior.getModels()) {
if (isSignificant(cluster)) {
@@ -120,11 +116,13 @@ public class DisplayDirichlet extends Di
}
}
- private static void runSequentialDirichletClusterer(
- ModelDistribution<VectorWritable> modelDist, int numClusters,
- int numIterations, double alpha0, int thin, int burnin) {
- DirichletClusterer dc = new DirichletClusterer(SAMPLE_DATA, modelDist,
- alpha0, numClusters, thin, burnin);
+ private static void runSequentialDirichletClusterer(ModelDistribution<VectorWritable> modelDist,
+ int numClusters,
+ int numIterations,
+ double alpha0,
+ int thin,
+ int burnin) {
+ DirichletClusterer dc = new DirichletClusterer(SAMPLE_DATA, modelDist, alpha0, numClusters, thin, burnin);
List<Cluster[]> result = dc.cluster(numIterations);
printModels(result, burnin);
for (Cluster[] models : result) {
@@ -140,9 +138,7 @@ public class DisplayDirichlet extends Di
public static void main(String[] args) throws Exception {
VectorWritable modelPrototype = new VectorWritable(new DenseVector(2));
- ModelDistribution<VectorWritable> modelDist = new GaussianClusterDistribution(
- modelPrototype);
-
+ ModelDistribution<VectorWritable> modelDist = new GaussianClusterDistribution(modelPrototype);
RandomUtils.useTestSeed();
generateSamples();
int numIterations = 20;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Tue May 10 11:30:14 2011
@@ -21,6 +21,7 @@ import java.awt.Graphics;
import java.awt.Graphics2D;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -59,8 +60,6 @@ class DisplayFuzzyKMeans extends Display
Path samples = new Path("samples");
Path output = new Path("output");
- int numClusters = 3;
- int maxIterations = 10;
Configuration conf = new Configuration();
HadoopUtil.delete(conf, samples);
HadoopUtil.delete(conf, output);
@@ -68,20 +67,23 @@ class DisplayFuzzyKMeans extends Display
DisplayClustering.generateSamples();
writeSampleData(samples);
boolean runClusterer = false;
+ int maxIterations = 10;
if (runClusterer) {
- runSequentialFuzzyKClusterer(conf, samples, output, measure, numClusters,
- maxIterations);
+ runSequentialFuzzyKClusterer(conf, samples, output, measure, maxIterations);
} else {
- runSequentialFuzzyKClassifier(conf, samples, output, measure,
- numClusters, maxIterations);
+ int numClusters = 3;
+ runSequentialFuzzyKClassifier(conf, samples, output, measure, numClusters, maxIterations);
}
new DisplayFuzzyKMeans();
}
private static void runSequentialFuzzyKClassifier(Configuration conf,
- Path samples, Path output, DistanceMeasure measure, int numClusters,
- int maxIterations) throws IOException {
- List<Vector> points = new ArrayList<Vector>();
+ Path samples,
+ Path output,
+ DistanceMeasure measure,
+ int numClusters,
+ int maxIterations) throws IOException {
+ Collection<Vector> points = new ArrayList<Vector>();
for (int i = 0; i < numClusters; i++) {
points.add(SAMPLE_DATA.get(i).get());
}
@@ -95,18 +97,19 @@ class DisplayFuzzyKMeans extends Display
writeClassifier(prior, conf, priorClassifier);
ClusteringPolicy policy = new FuzzyKMeansClusteringPolicy();
- new ClusterIterator(policy).iterate(samples, priorClassifier, output,
- maxIterations);
+ new ClusterIterator(policy).iterate(samples, priorClassifier, output, maxIterations);
for (int i = 1; i <= maxIterations; i++) {
- ClusterClassifier posterior = readClassifier(conf, new Path(output,
- "classifier-" + i));
+ ClusterClassifier posterior = readClassifier(conf, new Path(output, "classifier-" + i));
CLUSTERS.add(posterior.getModels());
}
}
- private static void runSequentialFuzzyKClusterer(Configuration conf, Path samples,
- Path output, DistanceMeasure measure, int numClusters, int maxIterations)
- throws IOException, ClassNotFoundException, InterruptedException {
+ private static void runSequentialFuzzyKClusterer(Configuration conf,
+ Path samples,
+ Path output,
+ DistanceMeasure measure,
+ int maxIterations)
+ throws IOException, ClassNotFoundException, InterruptedException {
Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new Path(
output, "clusters-0"), 3, measure);
double threshold = 0.001;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1101411&r1=1101410&r2=1101411&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Tue May 10 11:30:14 2011
@@ -21,6 +21,7 @@ import java.awt.Graphics;
import java.awt.Graphics2D;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -39,13 +40,10 @@ import org.apache.mahout.common.distance
import org.apache.mahout.math.Vector;
class DisplayKMeans extends DisplayClustering {
-
- // static List<List<Cluster>> result;
-
+
DisplayKMeans() {
initialize();
- this.setTitle("k-Means Clusters (>" + (int) (significance * 100)
- + "% of population)");
+ this.setTitle("k-Means Clusters (>" + (int) (significance * 100) + "% of population)");
}
public static void main(String[] args) throws Exception {
@@ -53,8 +51,6 @@ class DisplayKMeans extends DisplayClust
Path samples = new Path("samples");
Path output = new Path("output");
Configuration conf = new Configuration();
- int numClusters = 3;
- int maxIterations = 10;
HadoopUtil.delete(conf, samples);
HadoopUtil.delete(conf, output);
@@ -63,19 +59,21 @@ class DisplayKMeans extends DisplayClust
writeSampleData(samples);
boolean runClusterer = false;
if (runClusterer) {
- runSequentialKMeansClusterer(conf, samples, output, measure, numClusters,
- maxIterations);
+ int numClusters = 3;
+ runSequentialKMeansClusterer(conf, samples, output, measure, numClusters);
} else {
- runSequentialKMeansClassifier(conf, samples, output, measure,
- numClusters, maxIterations);
+ int maxIterations = 10;
+ runSequentialKMeansClassifier(conf, samples, output, measure, maxIterations);
}
new DisplayKMeans();
}
private static void runSequentialKMeansClassifier(Configuration conf,
- Path samples, Path output, DistanceMeasure measure, int numClusters,
- int maxIterations) throws IOException {
- List<Vector> points = new ArrayList<Vector>();
+ Path samples,
+ Path output,
+ DistanceMeasure measure,
+ int numClusters) throws IOException {
+ Collection<Vector> points = new ArrayList<Vector>();
for (int i = 0; i < numClusters; i++) {
points.add(SAMPLE_DATA.get(i).get());
}
@@ -91,18 +89,19 @@ class DisplayKMeans extends DisplayClust
int maxIter = 10;
ClusteringPolicy policy = new KMeansClusteringPolicy();
- new ClusterIterator(policy).iterate(samples, priorClassifier, output,
- maxIter);
+ new ClusterIterator(policy).iterate(samples, priorClassifier, output, maxIter);
for (int i = 1; i <= maxIter; i++) {
- ClusterClassifier posterior = readClassifier(conf, new Path(output,
- "classifier-" + i));
+ ClusterClassifier posterior = readClassifier(conf, new Path(output, "classifier-" + i));
CLUSTERS.add(posterior.getModels());
}
}
- private static void runSequentialKMeansClusterer(Configuration conf, Path samples,
- Path output, DistanceMeasure measure, int numClusters, int maxIterations)
- throws IOException, InterruptedException, ClassNotFoundException {
+ private static void runSequentialKMeansClusterer(Configuration conf,
+ Path samples,
+ Path output,
+ DistanceMeasure measure,
+ int maxIterations)
+ throws IOException, InterruptedException, ClassNotFoundException {
Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new Path(
output, "clusters-0"), 3, measure);
double distanceThreshold = 0.001;