You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/09/26 16:31:54 UTC
svn commit: r1001437 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/ep/
core/src/main/java/org/apache/mahout/fpm/pfpgrowth/
core/src/main/java/org/apache/mahout/vectors/
core/src/test/java/org/apache/mahout/classifier/sgd/ core/src/test/java/...
Author: srowen
Date: Sun Sep 26 14:31:53 2010
New Revision: 1001437
URL: http://svn.apache.org/viewvc?rev=1001437&view=rev
Log:
Another run at checkstyle/PMD
Removed:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/MurmurHash.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/TextValueEncoderTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/WordLikeValueEncoderTest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/PrintResourceOrFile.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NormalTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/stat/GammaTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java
mahout/trunk/taste-web/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyPartitioner.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TFPartialVectorReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountMapper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFPartialVectorReducer.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java Sun Sep 26 14:31:53 2010
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.ep;
import com.google.common.collect.Lists;
@@ -165,7 +182,7 @@ public class EvolutionaryProcess<T exten
return population;
}
- public void close() throws EarlyTerminationException {
+ public void close() {
List<Runnable> remainingTasks = pool.shutdownNow();
if (remainingTasks.size() > 0) {
throw new EarlyTerminationException("Had to forcefully shut down " + remainingTasks.size() + " tasks");
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java Sun Sep 26 14:31:53 2010
@@ -25,6 +25,9 @@ import org.apache.mahout.math.function.U
*/
public abstract class Mapping implements UnaryFunction {
+ private Mapping() {
+ }
+
public static final class SoftLimit extends Mapping {
private double min;
private double max;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java Sun Sep 26 14:31:53 2010
@@ -247,12 +247,12 @@ public final class TransactionTree imple
public Map<Integer,MutableLong> generateFList() {
Map<Integer,MutableLong> frequencyList = new HashMap<Integer,MutableLong>();
Iterator<Pair<List<Integer>,Long>> it = getIterator();
- int items = 0;
- int count = 0;
+ //int items = 0;
+ //int count = 0;
while (it.hasNext()) {
Pair<List<Integer>,Long> p = it.next();
- items += p.getFirst().size();
- count++;
+ //items += p.getFirst().size();
+ //count++;
for (Integer i : p.getFirst()) {
if (!frequencyList.containsKey(i)) {
frequencyList.put(i, new MutableLong(0));
@@ -398,7 +398,8 @@ public final class TransactionTree imple
vLong.write(out);
vInt.set(childCount[i]);
vInt.write(out);
- for (int j = 0, k = childCount[i]; j < k; j++) {
+ int max = childCount[i];
+ for (int j = 0; j < max; j++) {
vInt.set(nodeChildren[i][j]);
vInt.write(out);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/MurmurHash.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/MurmurHash.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/MurmurHash.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/MurmurHash.java Sun Sep 26 14:31:53 2010
@@ -27,7 +27,8 @@ import java.nio.ByteOrder;
* <p>The C version of MurmurHash 2.0 found at that site was ported
* to Java by Andrzej Bialecki (ab at getopt org).</p>
*/
-public class MurmurHash {
+public final class MurmurHash {
+
private MurmurHash() {
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java Sun Sep 26 14:31:53 2010
@@ -64,7 +64,7 @@ public final class ModelSerializerTest e
public void onlineAucRoundtrip() {
RandomUtils.useTestSeed();
OnlineAuc auc1 = new GlobalOnlineAuc();
- Random gen = new Random(2);
+ Random gen = RandomUtils.getRandom(2L);
for (int i = 0; i < 10000; i++) {
auc1.addSample(0, gen.nextGaussian());
auc1.addSample(1, gen.nextGaussian() + 1);
@@ -162,7 +162,7 @@ public final class ModelSerializerTest e
@Test
public void trainingExampleList() {
- Random gen = new Random(1);
+ Random gen = RandomUtils.getRandom(1L);
List<AdaptiveLogisticRegression.TrainingExample> x1 = Lists.newArrayList();
for (int i = 0; i < 10; i++) {
AdaptiveLogisticRegression.TrainingExample t =
@@ -189,7 +189,7 @@ public final class ModelSerializerTest e
private static void train(OnlineLearner olr, int n) {
Vector beta = new DenseVector(new double[]{1, -1, 0, 0.5, -0.5});
- Random gen = new Random(1);
+ Random gen = RandomUtils.getRandom(1L);
for (int i = 0; i < n; i++) {
Vector x = randomVector(gen, 5);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectors/TextValueEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectors/TextValueEncoderTest.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectors/TextValueEncoderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectors/TextValueEncoderTest.java Sun Sep 26 14:31:53 2010
@@ -17,7 +17,6 @@
package org.apache.mahout.vectors;
-import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableMap;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseVector;
@@ -62,7 +61,7 @@ public final class TextValueEncoderTest
@Test
public void testAsString() {
Locale.setDefault(Locale.ENGLISH);
- TextValueEncoder enc = new TextValueEncoder("text");
+ FeatureVectorEncoder enc = new TextValueEncoder("text");
assertEquals("[text:test1:1.0000, text:and:1.0000, text:more:1.0000]", enc.asString("test1 and more"));
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectors/WordLikeValueEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectors/WordLikeValueEncoderTest.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectors/WordLikeValueEncoderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectors/WordLikeValueEncoderTest.java Sun Sep 26 14:31:53 2010
@@ -17,7 +17,6 @@
package org.apache.mahout.vectors;
-import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.mahout.common.MahoutTestCase;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java Sun Sep 26 14:31:53 2010
@@ -87,18 +87,16 @@ public class XmlInputFormat extends Text
}
private boolean next(LongWritable key, Text value) throws IOException {
- if (fsin.getPos() < end) {
- if (readUntilMatch(startTag, false)) {
- try {
- buffer.write(startTag);
- if (readUntilMatch(endTag, true)) {
- key.set(fsin.getPos());
- value.set(buffer.getData(), 0, buffer.getLength());
- return true;
- }
- } finally {
- buffer.reset();
+ if (fsin.getPos() < end && readUntilMatch(startTag, false)) {
+ try {
+ buffer.write(startTag);
+ if (readUntilMatch(endTag, true)) {
+ key.set(fsin.getPos());
+ value.set(buffer.getData(), 0, buffer.getLength());
+ return true;
}
+ } finally {
+ buffer.reset();
}
}
return false;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/PrintResourceOrFile.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/PrintResourceOrFile.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/PrintResourceOrFile.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/PrintResourceOrFile.java Sun Sep 26 14:31:53 2010
@@ -18,14 +18,17 @@
package org.apache.mahout.classifier.sgd;
import java.io.BufferedReader;
-import java.io.IOException;
/**
* Uses the same logic as TrainLogistic and RunLogistic for finding an input, but instead
* of processing the input, this class just prints the input to standard out.
*/
-public class PrintResourceOrFile {
- public static void main(String[] args) throws IOException {
+public final class PrintResourceOrFile {
+
+ private PrintResourceOrFile() {
+ }
+
+ public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Must have a single argument that names a file or resource.");
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java Sun Sep 26 14:31:53 2010
@@ -34,7 +34,7 @@ import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
-public class RunLogistic {
+public final class RunLogistic {
private static String inputFile;
private static String modelFile;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java Sun Sep 26 14:31:53 2010
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.clustering.display;
import org.apache.hadoop.fs.Path;
@@ -6,6 +23,6 @@ import org.apache.hadoop.fs.PathFilter;
public class ClustersFilter implements PathFilter {
@Override
public boolean accept(Path path) {
- return (path.toString().contains("/clusters-"));
+ return path.toString().contains("/clusters-");
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java Sun Sep 26 14:31:53 2010
@@ -22,6 +22,7 @@ import java.awt.Graphics;
import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -109,10 +110,11 @@ final class DisplayMeanShift extends Dis
writeSampleData(samples);
boolean b = true;
if (b) {
- new MeanShiftCanopyDriver().run(new Configuration(), samples, output, measure, t1, t2, 0.005, 20, false, true, true);
+ new MeanShiftCanopyDriver().run(
+ new Configuration(), samples, output, measure, t1, t2, 0.005, 20, false, true, true);
loadClusters(output);
} else {
- List<Vector> points = new ArrayList<Vector>();
+ Collection<Vector> points = new ArrayList<Vector>();
for (VectorWritable sample : SAMPLE_DATA) {
points.add(sample.get());
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java Sun Sep 26 14:31:53 2010
@@ -41,8 +41,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class InputDriver {
- /**Logger for this class.*/
- private static final Logger LOG = LoggerFactory.getLogger(InputDriver.class);
+
+ private static final Logger log = LoggerFactory.getLogger(InputDriver.class);
private InputDriver() {
}
@@ -78,12 +78,13 @@ public final class InputDriver {
"org.apache.mahout.math.RandomAccessSparseVector").toString();
runJob(input, output, vectorClassName);
} catch (OptionException e) {
- InputDriver.LOG.error("Exception parsing command line: ", e);
+ InputDriver.log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}
}
- public static void runJob(Path input, Path output, String vectorClassName) throws IOException, InterruptedException, ClassNotFoundException {
+ public static void runJob(Path input, Path output, String vectorClassName)
+ throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set("vector.implementation.class.name", vectorClassName);
Job job = new Job(conf);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java Sun Sep 26 14:31:53 2010
@@ -79,6 +79,6 @@ public class InputMapper extends Mapper<
} catch (ClassNotFoundException e) {
throw new IllegalStateException(e);
}
+ }
- }
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Sun Sep 26 14:31:53 2010
@@ -24,7 +24,6 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.canopy.CanopyDriver;
-import org.apache.mahout.clustering.syntheticcontrol.Constants;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
@@ -36,6 +35,8 @@ import org.slf4j.LoggerFactory;
public final class Job extends AbstractJob {
+ private static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "data";
+
private Job() {
}
@@ -74,11 +75,12 @@ public final class Job extends AbstractJ
*/
private static void run(Path input, Path output, DistanceMeasure measure, double t1, double t2) throws IOException,
InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
- Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
+ Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
CanopyDriver.run(new Configuration(), directoryContainingConvertedInput, output, measure, t1, t2, true, false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"));
+ ClusterDumper clusterDumper =
+ new ClusterDumper(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Sun Sep 26 14:31:53 2010
@@ -37,7 +37,6 @@ import org.apache.mahout.clustering.diri
import org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution;
import org.apache.mahout.clustering.dirichlet.models.GaussianClusterDistribution;
import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
-import org.apache.mahout.clustering.syntheticcontrol.Constants;
import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
@@ -52,6 +51,8 @@ public final class Job extends AbstractJ
private static final Logger log = LoggerFactory.getLogger(Job.class);
+ private static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "data";
+
private Job() {
}
@@ -63,30 +64,35 @@ public final class Job extends AbstractJ
log.info("Running with default arguments");
Path output = new Path("output");
HadoopUtil.overwriteOutput(output);
- AbstractVectorModelDistribution modelDistribution = new GaussianClusterDistribution(new VectorWritable(new RandomAccessSparseVector(60)));
+ ModelDistribution<VectorWritable> modelDistribution =
+ new GaussianClusterDistribution(new VectorWritable(new RandomAccessSparseVector(60)));
new Job().run(new Path("testdata"), output, modelDistribution, 10, 5, 1.0, true, 0);
}
}
@Override
- public int run(String[] args) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
- NoSuchMethodException, InvocationTargetException, InterruptedException {
+ public int run(String[] args)
+ throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
+ NoSuchMethodException, InvocationTargetException, InterruptedException {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.numClustersOption().withRequired(true).create());
addOption(DefaultOptionCreator.overwriteOption().create());
- addOption(new DefaultOptionBuilder().withLongName(DirichletDriver.ALPHA_OPTION).withRequired(false).withShortName("m")
- .withArgument(new ArgumentBuilder().withName(DirichletDriver.ALPHA_OPTION).withDefault("1.0").withMinimum(1).withMaximum(1)
- .create()).withDescription("The alpha0 value for the DirichletDistribution. Defaults to 1.0").create());
- addOption(new DefaultOptionBuilder().withLongName(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION).withRequired(false)
- .withShortName("md").withArgument(new ArgumentBuilder().withName(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION)
+ addOption(new DefaultOptionBuilder().withLongName(DirichletDriver.ALPHA_OPTION).withRequired(false)
+ .withShortName("m").withArgument(new ArgumentBuilder().withName(DirichletDriver.ALPHA_OPTION).withDefault("1.0")
+ .withMinimum(1).withMaximum(1).create())
+ .withDescription("The alpha0 value for the DirichletDistribution. Defaults to 1.0").create());
+ addOption(new DefaultOptionBuilder().withLongName(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION)
+ .withRequired(false).withShortName("md").withArgument(new ArgumentBuilder()
+ .withName(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION)
.withDefault(NormalModelDistribution.class.getName()).withMinimum(1).withMaximum(1).create())
.withDescription("The ModelDistribution class name. " + "Defaults to NormalModelDistribution").create());
addOption(new DefaultOptionBuilder().withLongName(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION).withRequired(false)
.withShortName("mp").withArgument(new ArgumentBuilder().withName("prototypeClass")
.withDefault(RandomAccessSparseVector.class.getName()).withMinimum(1).withMaximum(1).create())
- .withDescription("The ModelDistribution prototype Vector class name. Defaults to RandomAccessSparseVector").create());
+ .withDescription("The ModelDistribution prototype Vector class name. Defaults to RandomAccessSparseVector")
+ .create());
addOption(DefaultOptionCreator.distanceMeasureOption().withRequired(false).create());
addOption(DefaultOptionCreator.emitMostLikelyOption().create());
addOption(DefaultOptionCreator.thresholdOption().create());
@@ -141,9 +147,10 @@ public final class Job extends AbstractJ
int maxIterations,
double alpha0,
boolean emitMostLikely,
- double threshold) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
- NoSuchMethodException, InvocationTargetException, SecurityException, InterruptedException {
- Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
+ double threshold)
+ throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
+ SecurityException, InterruptedException {
+ Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
DirichletDriver.run(directoryContainingConvertedInput,
output,
@@ -156,8 +163,8 @@ public final class Job extends AbstractJ
threshold,
false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
- "clusteredPoints"));
+ ClusterDumper clusterDumper =
+ new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
@@ -203,7 +210,7 @@ public final class Job extends AbstractJ
*/
private static void printClusters(Iterable<List<DirichletCluster>> clusters, int significant) {
int row = 0;
- StringBuilder result = new StringBuilder();
+ StringBuilder result = new StringBuilder(100);
for (List<DirichletCluster> r : clusters) {
result.append("sample=").append(row++).append("]= ");
for (int k = 0; k < r.size(); k++) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Sun Sep 26 14:31:53 2010
@@ -29,7 +29,6 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
-import org.apache.mahout.clustering.syntheticcontrol.Constants;
import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
@@ -43,9 +42,11 @@ import org.slf4j.LoggerFactory;
public final class Job extends AbstractJob {
- private static final String M_OPTION = FuzzyKMeansDriver.M_OPTION;
private static final Logger log = LoggerFactory.getLogger(Job.class);
+ private static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "data";
+ private static final String M_OPTION = FuzzyKMeansDriver.M_OPTION;
+
private Job() {
}
@@ -57,12 +58,17 @@ public final class Job extends AbstractJ
log.info("Running with default arguments");
Path output = new Path("output");
HadoopUtil.overwriteOutput(output);
- new Job().run(new Configuration(), new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 10, (float) 2, 0.5);
+ new Job().run(new Configuration(),
+ new Path("testdata"),
+ output,
+ new EuclideanDistanceMeasure(),
+ 80, 55, 10, (float) 2, 0.5);
}
}
@Override
- public int run(String[] args) throws Exception {
+ public int run(String[] args)
+ throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.distanceMeasureOption().create());
@@ -105,14 +111,8 @@ public final class Job extends AbstractJ
/**
* Return the path to the final iteration's clusters
- *
- * @param conf
- * @param output
- * @param maxIterations
- * @return
- * @throws IOException
*/
- private Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException {
+ private static Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException {
FileSystem fs = FileSystem.get(conf);
for (int i = maxIterations; i >= 0; i--) {
Path clusters = new Path(output, "clusters-" + i);
@@ -130,7 +130,6 @@ public final class Job extends AbstractJ
* expects the a file containing synthetic_control.data as obtained from
* http://archive.ics.uci.edu/ml/datasets/Synthetic+Control+Chart+Time+Series resides in a directory named
* "testdata", and writes output to a directory named "output".
- * @param conf TODO
* @param input
* the String denoting the input directory path
* @param output
@@ -154,9 +153,9 @@ public final class Job extends AbstractJ
double t2,
int maxIterations,
float fuzziness,
- double convergenceDelta) throws IOException, InstantiationException, IllegalAccessException,
- InterruptedException, ClassNotFoundException {
- Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
+ double convergenceDelta)
+ throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
+ Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
log.info("Preparing Input");
InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
log.info("Running Canopy to get initial clusters");
@@ -174,8 +173,8 @@ public final class Job extends AbstractJ
0.0,
false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, maxIterations), new Path(output,
- "clusteredPoints"));
+ ClusterDumper clusterDumper =
+ new ClusterDumper(finalClusterPath(conf, output, maxIterations), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Sun Sep 26 14:31:53 2010
@@ -27,7 +27,6 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
-import org.apache.mahout.clustering.syntheticcontrol.Constants;
import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
@@ -43,6 +42,8 @@ public final class Job extends AbstractJ
private static final Logger log = LoggerFactory.getLogger(Job.class);
+ private static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "data";
+
private Job() {
}
@@ -59,7 +60,8 @@ public final class Job extends AbstractJ
}
@Override
- public int run(String[] args) throws Exception {
+ public int run(String[] args)
+ throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.distanceMeasureOption().create());
@@ -116,11 +118,6 @@ public final class Job extends AbstractJ
* the double convergence criteria for iterations
* @param maxIterations
* the int maximum number of iterations
- *
- * @throws IllegalAccessException
- * @throws InstantiationException
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
public void run(Configuration conf,
Path input,
@@ -129,9 +126,9 @@ public final class Job extends AbstractJ
double t1,
double t2,
double convergenceDelta,
- int maxIterations) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
- ClassNotFoundException {
- Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
+ int maxIterations)
+ throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
+ Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
log.info("Preparing Input");
InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
log.info("Running Canopy to get initial clusters");
@@ -147,21 +144,15 @@ public final class Job extends AbstractJ
true,
false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, maxIterations), new Path(output,
- "clusteredPoints"));
+ ClusterDumper clusterDumper =
+ new ClusterDumper(finalClusterPath(conf, output, maxIterations), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
/**
* Return the path to the final iteration's clusters
- *
- * @param conf
- * @param output
- * @param maxIterations
- * @return
- * @throws IOException
*/
- private Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException {
+ private static Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException {
FileSystem fs = FileSystem.get(conf);
for (int i = maxIterations; i >= 0; i--) {
Path clusters = new Path(output, "clusters-" + i);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java Sun Sep 26 14:31:53 2010
@@ -41,7 +41,7 @@ import org.slf4j.LoggerFactory;
public final class InputDriver {
- private static final Logger LOG = LoggerFactory.getLogger(InputDriver.class);
+ private static final Logger log = LoggerFactory.getLogger(InputDriver.class);
private InputDriver() {
}
@@ -67,7 +67,7 @@ public final class InputDriver {
Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
runJob(input, output);
} catch (OptionException e) {
- InputDriver.LOG.error("Exception parsing command line: ", e);
+ InputDriver.log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Sun Sep 26 14:31:53 2010
@@ -26,7 +26,6 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
-import org.apache.mahout.clustering.syntheticcontrol.Constants;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
@@ -40,6 +39,8 @@ public final class Job extends AbstractJ
private static final Logger log = LoggerFactory.getLogger(Job.class);
+ private static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "data";
+
private Job() {
}
@@ -56,7 +57,8 @@ public final class Job extends AbstractJ
}
@Override
- public int run(String[] args) throws Exception {
+ public int run(String[] args)
+ throws IOException, ClassNotFoundException, InterruptedException, InstantiationException, IllegalAccessException {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.convergenceOption().create());
@@ -87,7 +89,7 @@ public final class Job extends AbstractJ
double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
ClassLoader ccl = Thread.currentThread().getContextClassLoader();
- DistanceMeasure measure = (DistanceMeasure) ((Class<?>) ccl.loadClass(measureClass)).newInstance();
+ DistanceMeasure measure = ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations);
return 0;
@@ -100,7 +102,6 @@ public final class Job extends AbstractJ
* the job expects the a file containing synthetic_control.data as obtained from
* http://archive.ics.uci.edu/ml/datasets/Synthetic+Control+Chart+Time+Series resides in a directory named
* "testdata", and writes output to a directory named "output".
- * @param conf TODO
* @param input
* the String denoting the input directory path
* @param output
@@ -123,9 +124,9 @@ public final class Job extends AbstractJ
double t1,
double t2,
double convergenceDelta,
- int maxIterations) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
- IllegalAccessException {
- Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
+ int maxIterations)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+ Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput);
new MeanShiftCanopyDriver().run(conf,
directoryContainingConvertedInput,
@@ -138,8 +139,8 @@ public final class Job extends AbstractJ
true,
true, false);
// run ClusterDumper
- ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
- "clusteredPoints"));
+ ClusterDumper clusterDumper =
+ new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(null);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java Sun Sep 26 14:31:53 2010
@@ -166,8 +166,7 @@ public final class CDGA {
double mutrange,
int mutprec,
int popSize,
- int genCount)
- throws IOException, InterruptedException, ClassNotFoundException {
+ int genCount) throws IOException, InterruptedException, ClassNotFoundException {
Path inpath = new Path(dataset);
CDMahoutEvaluator.initializeDataSet(inpath);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducer.java Sun Sep 26 14:31:53 2010
@@ -29,8 +29,9 @@ import org.apache.mahout.ga.watchmaker.c
public class CDReducer extends Reducer<LongWritable, CDFitness, LongWritable, CDFitness> {
@Override
- protected void reduce(LongWritable key, Iterable<CDFitness> values, Context context)
- throws IOException, InterruptedException {
+ protected void reduce(LongWritable key,
+ Iterable<CDFitness> values,
+ Context context) throws IOException, InterruptedException {
int tp = 0;
int fp = 0;
int tn = 0;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java Sun Sep 26 14:31:53 2010
@@ -18,9 +18,9 @@
package org.apache.mahout.ga.watchmaker.cd.tool;
import java.io.IOException;
+import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.Set;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -38,13 +38,14 @@ import org.apache.mahout.common.StringUt
*/
public class ToolCombiner extends Reducer<LongWritable, Text, LongWritable, Text> {
- private final Set<String> distinct = new HashSet<String>();
+ private final Collection<String> distinct = new HashSet<String>();
private Descriptors descriptors;
@Override
- protected void reduce(LongWritable key, Iterable<Text> values, Context context)
- throws IOException, InterruptedException {
+ protected void reduce(LongWritable key,
+ Iterable<Text> values,
+ Context context) throws IOException, InterruptedException {
context.write(key, new Text(createDescription((int) key.get(), values.iterator())));
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java Sun Sep 26 14:31:53 2010
@@ -43,7 +43,9 @@ public class ToolReducer extends Reducer
private final Collection<String> distinct = new HashSet<String>();
@Override
- protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
+ protected void reduce(LongWritable key,
+ Iterable<Text> values,
+ Context context) throws IOException, InterruptedException {
context.write(key, new Text(combineDescriptions((int) key.get(), values.iterator())));
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java Sun Sep 26 14:31:53 2010
@@ -146,11 +146,12 @@ public final class WikipediaToSequenceFi
* category string
* @param all
* if true select all categories
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
- public static void runJob(String input, String output, String catFile,
- boolean exactMatchOnly, boolean all) throws IOException, InterruptedException, ClassNotFoundException {
+ public static void runJob(String input,
+ String output,
+ String catFile,
+ boolean exactMatchOnly,
+ boolean all) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set("xmlinput.start", "<page>");
conf.set("xmlinput.end", "</page>");
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java Sun Sep 26 14:31:53 2010
@@ -23,6 +23,7 @@ import java.util.Random;
import java.util.ArrayList;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.AbstractMatrix;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
@@ -50,7 +51,7 @@ public class HebbianSolver {
private final SingularVectorVerifier verifier;
private final double convergenceTarget;
private final int maxPassesPerEigen;
- private final Random rng = new Random();
+ private final Random rng = RandomUtils.getRandom();
private int numPasses = 0;
private static final boolean debug = false;
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java Sun Sep 26 14:31:53 2010
@@ -17,6 +17,7 @@
package org.apache.mahout.math;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.function.VectorFunction;
import org.junit.Before;
@@ -250,7 +251,7 @@ public abstract class MatrixTest extends
assertEquals(c[COL], test.viewRow(3).size());
assertEquals(c[COL], test.viewRow(5).size());
- Random gen = new Random(1);
+ Random gen = RandomUtils.getRandom(1L);
for (int row = 0; row < c[ROW]; row++) {
int j = gen.nextInt(c[COL]);
double old = test.get(row, j);
@@ -274,7 +275,7 @@ public abstract class MatrixTest extends
assertEquals(c[ROW], test.viewColumn(3).size());
assertEquals(c[ROW], test.viewColumn(5).size());
- Random gen = new Random(1);
+ Random gen = RandomUtils.getRandom(1L);
for (int col = 0; col < c[COL]; col++) {
int j = gen.nextInt(c[COL]);
double old = test.get(col, j);
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java Sun Sep 26 14:31:53 2010
@@ -17,6 +17,7 @@
package org.apache.mahout.math;
+import org.apache.mahout.common.RandomUtils;
import org.junit.Test;
import java.util.Random;
@@ -43,7 +44,7 @@ public final class TestSingularValueDeco
double[] singularValues = { 123.456, 2.3, 1.001, 0.999 };
int rows = singularValues.length + 2;
int columns = singularValues.length;
- Random r = new Random(15338437322523L);
+ Random r = RandomUtils.getRandom(15338437322523L);
SingularValueDecomposition svd =
new SingularValueDecomposition(createTestMatrix(r, rows, columns, singularValues));
double[] computedSV = svd.getSingularValues();
@@ -58,7 +59,7 @@ public final class TestSingularValueDeco
double[] singularValues = { 123.456, 2.3, 1.001, 0.999 };
int rows = singularValues.length;
int columns = singularValues.length + 2;
- Random r = new Random(732763225836210L);
+ Random r = RandomUtils.getRandom(732763225836210L);
SingularValueDecomposition svd =
new SingularValueDecomposition(createTestMatrix(r, rows, columns, singularValues));
double[] computedSV = svd.getSingularValues();
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java Sun Sep 26 14:31:53 2010
@@ -17,6 +17,7 @@
package org.apache.mahout.math.decomposer;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.MahoutTestCase;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.SequentialAccessSparseVector;
@@ -82,8 +83,8 @@ public abstract class SolverTest extends
int entriesPerRow,
double entryMean) {
SparseRowMatrix m = new SparseRowMatrix(new int[]{numRows, numCols});
- double n = 0;
- Random r = new Random(1234L);
+ //double n = 0;
+ Random r = RandomUtils.getRandom(1234L);
for (int i = 0; i < nonNullRows; i++) {
SequentialAccessSparseVector v = new SequentialAccessSparseVector(numCols);
for (int j = 0; j < entriesPerRow; j++) {
@@ -100,7 +101,7 @@ public abstract class SolverTest extends
m.assignRow(c, other.clone());
}
}
- n += m.getRow(c).getLengthSquared();
+ //n += m.getRow(c).getLengthSquared();
}
return m;
}
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NormalTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NormalTest.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NormalTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NormalTest.java Sun Sep 26 14:31:53 2010
@@ -33,7 +33,7 @@ public final class NormalTest extends Ma
@Test
public void testCdf() {
- Random gen = new Random(1);
+ Random gen = RandomUtils.getRandom(1L);
double offset = 0;
double scale = 1;
for (int k = 0; k < 20; k++) {
@@ -46,7 +46,7 @@ public final class NormalTest extends Ma
@Test
public void consistency() throws ConvergenceException, FunctionEvaluationException {
- Random gen = new Random(1);
+ Random gen = RandomUtils.getRandom(1L);
double offset = 0;
double scale = 1;
Normal dist = new Normal(offset, scale, RandomUtils.getRandom());
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/stat/GammaTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/stat/GammaTest.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/stat/GammaTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/stat/GammaTest.java Sun Sep 26 14:31:53 2010
@@ -23,6 +23,7 @@ import com.google.common.collect.Iterabl
import com.google.common.io.CharStreams;
import com.google.common.io.InputSupplier;
import com.google.common.io.Resources;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.MahoutTestCase;
import org.junit.Test;
@@ -95,10 +96,10 @@ public final class GammaTest extends Mah
@Test
public void beta() {
- Random x = new Random(1);
+ Random r = RandomUtils.getRandom(1L);
for (int i = 0; i < 200; i++) {
- double alpha = -50 * Math.log(1 - x.nextDouble());
- double beta = -50 * Math.log(1 - x.nextDouble());
+ double alpha = -50 * Math.log(1 - r.nextDouble());
+ double beta = -50 * Math.log(1 - r.nextDouble());
double ref = Math.exp(Gamma.logGamma(alpha) + Gamma.logGamma(beta) - Gamma.logGamma(alpha + beta));
double actual = Gamma.beta(alpha, beta);
double err = (ref - actual) / ref;
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java Sun Sep 26 14:31:53 2010
@@ -17,6 +17,7 @@
package org.apache.mahout.math.stats;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.MahoutTestCase;
import org.junit.Test;
@@ -45,7 +46,7 @@ public final class OnlineSummarizerTest
// 95% confidence limits for those values.
// symmetrical, well behaved
- check(normal(10000, 1),
+ check(normal(10000),
-4.417246, -3.419809,
-0.6972919, -0.6519899,
-0.02056658, 0.02176474,
@@ -55,7 +56,7 @@ public final class OnlineSummarizerTest
0.988395, 1.011883);
// asymmetrical, well behaved. The range for the maximum was fudged slightly to all this to pass.
- check(exp(10000, 1),
+ check(exp(10000),
4.317969e-06, 3.278763e-04,
0.2783866, 0.298,
0.6765024, 0.7109463,
@@ -93,35 +94,26 @@ public final class OnlineSummarizerTest
}
}
- private static OnlineSummarizer normal(int n, int seed) {
+ private static OnlineSummarizer normal(int n) {
OnlineSummarizer x = new OnlineSummarizer();
- Random gen = new Random(seed);
+ // TODO use RandomUtils.getRandom() and rejigger constants to make test pass
+ Random gen = new Random(1L);
for (int i = 0; i < n; i++) {
x.add(gen.nextGaussian());
}
return x;
}
- private static OnlineSummarizer exp(int n, int seed) {
+ private static OnlineSummarizer exp(int n) {
OnlineSummarizer x = new OnlineSummarizer();
- Random gen = new Random(seed);
+ // TODO use RandomUtils.getRandom() and rejigger constants to make test pass
+ Random gen = new Random(1L);
for (int i = 0; i < n; i++) {
x.add(-Math.log(1 - gen.nextDouble()));
}
return x;
}
- /*
- private static OnlineSummarizer gamma(int n, int seed) {
- OnlineSummarizer x = new OnlineSummarizer();
- Gamma g = new Gamma(0.01, 100, new MersenneTwister(seed));
- for (int i = 0; i < n; i++) {
- x.add(g.nextDouble());
- }
- return x;
- }
- */
-
}
Modified: mahout/trunk/taste-web/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/taste-web/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/taste-web/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java (original)
+++ mahout/trunk/taste-web/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java Sun Sep 26 14:31:53 2010
@@ -170,7 +170,7 @@ public final class RecommenderServlet ex
}
private void writeDebugRecommendations(long userID, Iterable<RecommendedItem> items, PrintWriter writer)
- throws TasteException {
+ throws TasteException {
DataModel dataModel = recommender.getDataModel();
writer.print("User:");
writer.println(userID);
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java Sun Sep 26 14:31:53 2010
@@ -51,7 +51,7 @@ public class CDbwEvaluator {
private final DistanceMeasure measure;
- private boolean pruned = false;
+ private boolean pruned;
/**
* For testing only
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsReducer.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsReducer.java Sun Sep 26 14:31:53 2010
@@ -27,7 +27,8 @@ import org.apache.hadoop.mapreduce.Reduc
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.math.VectorWritable;
-public class RepresentativePointsReducer extends Reducer<IntWritable, WeightedVectorWritable, IntWritable, VectorWritable> {
+public class RepresentativePointsReducer
+ extends Reducer<IntWritable, WeightedVectorWritable, IntWritable, VectorWritable> {
private Map<Integer, List<VectorWritable>> representativePoints;
@@ -43,8 +44,8 @@ public class RepresentativePointsReducer
}
@Override
- protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context) throws IOException,
- InterruptedException {
+ protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context)
+ throws IOException, InterruptedException {
// find the most distant point
WeightedVectorWritable mdp = null;
for (WeightedVectorWritable dpw : values) {
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Sun Sep 26 14:31:53 2010
@@ -148,9 +148,9 @@ public final class ClusterDumper extends
Configuration conf = new Configuration();
if (this.termDictionary != null) {
- if (dictionaryFormat.equals("text")) {
+ if ("text".equals(dictionaryFormat)) {
dictionary = VectorHelper.loadTermDictionary(new File(this.termDictionary));
- } else if (dictionaryFormat.equals("sequencefile")) {
+ } else if ("sequencefile".equals(dictionaryFormat)) {
FileSystem fs = FileSystem.get(new Path(this.termDictionary).toUri(), conf);
dictionary = VectorHelper.loadTermDictionary(conf, fs, this.termDictionary);
} else {
@@ -340,7 +340,7 @@ public final class ClusterDumper extends
topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight));
}
- StringBuilder sb = new StringBuilder();
+ StringBuilder sb = new StringBuilder(100);
for (Pair<String, Double> item : topTerms) {
String term = item.getFirst();
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyPartitioner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyPartitioner.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyPartitioner.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyPartitioner.java Sun Sep 26 14:31:53 2010
@@ -37,7 +37,8 @@ public class GramKeyPartitioner extends
@Override
public int getPartition(GramKey key, Gram value, int numPartitions) {
- // see: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/partition/BinaryPartitioner.java?revision=816664&view=markup
+ // see: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/
+ // lib/partition/BinaryPartitioner.java?revision=816664&view=markup
int length = key.getLength() - 1;
int right = (offset + length) % length;
int hash = WritableComparator.hashBytes(key.getBytes(), right);
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java Sun Sep 26 14:31:53 2010
@@ -85,25 +85,22 @@ public class MapBackedARFFModel implemen
data = data.trim();
double result = 0.0;
switch (type) {
- case NUMERIC: {
+ case NUMERIC:
result = processNumeric(data);
break;
- }
- case DATE: {
+ case DATE:
result = processDate(data, idx);
break;
- }
- case STRING: {
+ case STRING:
// may have quotes
result = processString(data);
break;
- }
- case NOMINAL: {
+ case NOMINAL:
String label = idxLabel.get(idx);
result = processNominal(label, data);
break;
- }
-
+ default:
+ throw new IllegalStateException("Unknown type: " + type);
}
return result;
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java Sun Sep 26 14:31:53 2010
@@ -81,7 +81,8 @@ public final class PartialVectorMerger {
int dimension,
boolean sequentialAccess,
boolean namedVector,
- int numReducers) throws IOException, InterruptedException, ClassNotFoundException {
+ int numReducers)
+ throws IOException, InterruptedException, ClassNotFoundException {
if (normPower != NO_NORMALIZING && normPower < 0) {
throw new IllegalArgumentException("normPower must either be -1 or >= 0");
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java Sun Sep 26 14:31:53 2010
@@ -250,9 +250,6 @@ public final class DictionaryVectorizer
* output vectors should be named, retaining key (doc id) as a label
* @param numReducers
* the desired number of reducer tasks
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
private static void makePartialVectors(Path input,
int maxNGramSize,
@@ -261,7 +258,8 @@ public final class DictionaryVectorizer
int dimension,
boolean sequentialAccess,
boolean namedVectors,
- int numReducers) throws IOException, InterruptedException, ClassNotFoundException {
+ int numReducers)
+ throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
// this conf parameter needs to be set enable serialisation of conf values
@@ -300,10 +298,9 @@ public final class DictionaryVectorizer
/**
* Count the frequencies of words in parallel using Map/Reduce. The input documents have to be in
* {@link SequenceFile} format
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
- private static void startWordCounting(Path input, Path output, int minSupport) throws IOException, InterruptedException, ClassNotFoundException {
+ private static void startWordCounting(Path input, Path output, int minSupport)
+ throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
// this conf parameter needs to be set enable serialisation of conf values
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TFPartialVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TFPartialVectorReducer.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TFPartialVectorReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TFPartialVectorReducer.java Sun Sep 26 14:31:53 2010
@@ -59,7 +59,8 @@ public class TFPartialVectorReducer exte
private int maxNGramSize = 1;
@Override
- protected void reduce(Text key, Iterable<StringTuple> values, Context context) throws IOException, InterruptedException {
+ protected void reduce(Text key, Iterable<StringTuple> values, Context context)
+ throws IOException, InterruptedException {
Iterator<StringTuple> it = values.iterator();
if (!it.hasNext()) {
return;
@@ -73,11 +74,9 @@ public class TFPartialVectorReducer exte
do {
String term = (sf.getAttribute(TermAttribute.class)).term();
- if (term.length() > 0) { // ngram
- if (dictionary.containsKey(term)) {
- int termId = dictionary.get(term);
- vector.setQuick(termId, vector.getQuick(termId) + 1);
- }
+ if (term.length() > 0 && dictionary.containsKey(term)) { // ngram
+ int termId = dictionary.get(term);
+ vector.setQuick(termId, vector.getQuick(termId) + 1);
}
} while (sf.incrementToken());
@@ -85,11 +84,9 @@ public class TFPartialVectorReducer exte
sf.close();
} else {
for (String term : value.getEntries()) {
- if (term.length() > 0) { // unigram
- if (dictionary.containsKey(term)) {
- int termId = dictionary.get(term);
- vector.setQuick(termId, vector.getQuick(termId) + 1);
- }
+ if (term.length() > 0 && dictionary.containsKey(term)) { // unigram
+ int termId = dictionary.get(term);
+ vector.setQuick(termId, vector.getQuick(termId) + 1);
}
}
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountMapper.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountMapper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountMapper.java Sun Sep 26 14:31:53 2010
@@ -50,7 +50,7 @@ public class TermCountMapper extends Map
context.getCounter("Exception", "Output IO Exception").increment(1);
} catch (InterruptedException e) {
context.getCounter("Exception", "Interrupted Exception").increment(1);
- }
+ }
return true;
}
});
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java Sun Sep 26 14:31:53 2010
@@ -29,7 +29,6 @@ import org.apache.mahout.math.VectorWrit
/**
* TextVectorizer Document Frequency Count Mapper. Outputs 1 for each feature
- *
*/
public class TermDocumentCountMapper extends Mapper<WritableComparable<?>, VectorWritable, IntWritable, LongWritable> {
@@ -39,7 +38,7 @@ public class TermDocumentCountMapper ext
@Override
protected void map(WritableComparable<?> key, VectorWritable value, Context context)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Vector vector = value.get();
Iterator<Vector.Element> it = vector.iterateNonZero();
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountReducer.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountReducer.java Sun Sep 26 14:31:53 2010
@@ -29,7 +29,8 @@ import org.apache.hadoop.mapreduce.Reduc
public class TermDocumentCountReducer extends Reducer<IntWritable, LongWritable, IntWritable, LongWritable> {
@Override
- protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
+ protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context)
+ throws IOException, InterruptedException {
long sum = 0;
for (LongWritable value : values) {
sum += value.get();
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFPartialVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFPartialVectorReducer.java?rev=1001437&r1=1001436&r2=1001437&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFPartialVectorReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFPartialVectorReducer.java Sun Sep 26 14:31:53 2010
@@ -63,7 +63,7 @@ public class TFIDFPartialVectorReducer e
@Override
protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context context)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Iterator<VectorWritable> it = values.iterator();
if (!it.hasNext()) {
return;