You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/10/09 18:48:10 UTC
svn commit: r823611 - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/impl/common/
core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/
core/src/main/java/org/apache/mahout/classifier/bayes/datastore/
core/src/main/jav...
Author: srowen
Date: Fri Oct 9 16:48:09 2009
New Revision: 823611
URL: http://svn.apache.org/viewvc?rev=823611&view=rev
Log:
Code tweaks per style, practice discussions
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdNeighborhoodTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarityTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestDenseVector.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestOrderedIntDoubleMapping.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorView.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombinerTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducerTest.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/TermInfoWriter.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java Fri Oct 9 16:48:09 2009
@@ -67,31 +67,20 @@
}
private void doNext() {
+ int toSkip = 0;
+ while (r.nextDouble() >= samplingRate) {
+ toSkip++;
+ }
+ // Really, would be nicer to select value from geometric distribution, for small values of samplingRate
+ if (toSkip > 0) {
+ delegate.skip(toSkip);
+ }
boolean found = false;
- if (delegate instanceof SkippingIterator) {
- SkippingIterator<?> skippingDelegate = (SkippingIterator<?>) delegate;
- int toSkip = 0;
- while (r.nextDouble() >= samplingRate) {
- toSkip++;
- }
- // Really, would be nicer to select value from geometric distribution, for small values of samplingRate
- if (toSkip > 0) {
- skippingDelegate.skip(toSkip);
- }
- if (skippingDelegate.hasNext()) {
- next = delegate.next();
- found = true;
- }
- } else {
- while (delegate.hasNext()) {
- long delegateNext = delegate.next();
- if (r.nextDouble() < samplingRate) {
- next = delegateNext;
- found = true;
- break;
- }
- }
+ if (delegate.hasNext()) {
+ next = delegate.next();
+ found = true;
}
+
if (!found) {
hasNext = false;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java Fri Oct 9 16:48:09 2009
@@ -23,7 +23,6 @@
import org.apache.mahout.common.LongPair;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java Fri Oct 9 16:48:09 2009
@@ -48,12 +48,11 @@
protected Cache<String, Result> tableCache = null;
- protected String hbaseTable;
+ protected final String hbaseTable;
protected Parameters parameters = null;
- public HBaseBayesDatastore(String hbaseTable, Parameters params)
- throws IOException {
+ public HBaseBayesDatastore(String hbaseTable, Parameters params) {
this.hbaseTable = hbaseTable;
this.parameters = params;
this.tableCache = new HybridCache<String, Result>(50000, 100000);
@@ -81,7 +80,7 @@
}
}
- Map<String, Set<String>> keys = new HashMap<String, Set<String>>();
+ final Map<String, Set<String>> keys = new HashMap<String, Set<String>>();
@Override
public Collection<String> getKeys(String name)
@@ -180,7 +179,7 @@
} else
return r;
- } catch (Exception e) {
+ } catch (IOException e) {
return r;
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java Fri Oct 9 16:48:09 2009
@@ -32,13 +32,12 @@
public class InMemoryBayesDatastore implements Datastore {
- Map<String, Map<String, Map<String, Double>>> matrices = new HashMap<String, Map<String, Map<String, Double>>>();
-
- Map<String, Map<String, Double>> vectors = new HashMap<String, Map<String, Double>>();
-
+ final Map<String, Map<String, Map<String, Double>>> matrices = new HashMap<String, Map<String, Map<String, Double>>>();
+ final Map<String, Map<String, Double>> vectors = new HashMap<String, Map<String, Double>>();
Parameters params = null;
+ protected double thetaNormalizer = 1.0d;
- public InMemoryBayesDatastore(Parameters params) throws IOException {
+ public InMemoryBayesDatastore(Parameters params) {
matrices.put("weight", new HashMap<String, Map<String, Double>>());
vectors.put("sumWeight", new HashMap<String, Double>());
@@ -55,7 +54,7 @@
params.set("weight", basePath + "/trainer-tfIdf/trainer-tfIdf/part-*");
}
- protected double thetaNormalizer = 1.0d;
+
@Override
public void initialize() throws InvalidDatastoreException {
Configuration conf = new Configuration();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java Fri Oct 9 16:48:09 2009
@@ -29,7 +29,7 @@
* @param datastore
* @throws InvalidDatastoreException
*/
- public void initialize(Datastore datastore) throws InvalidDatastoreException;
+ void initialize(Datastore datastore) throws InvalidDatastoreException;
/**
* Classify the document and return the Result
@@ -42,7 +42,7 @@
* {@link org.apache.mahout.classifier.ClassifierResult}s.
* @throws InvalidDatastoreException
*/
- public ClassifierResult classifyDocument(String[] document,
+ ClassifierResult classifyDocument(String[] document,
Datastore datastore, String defaultCategory)
throws InvalidDatastoreException;
@@ -60,7 +60,7 @@
* {@link org.apache.mahout.classifier.ClassifierResult}s.
* @throws InvalidDatastoreException
*/
- public ClassifierResult[] classifyDocument(String[] document,
+ ClassifierResult[] classifyDocument(String[] document,
Datastore datastore, String defaultCategory, int numResults)
throws InvalidDatastoreException;
@@ -75,7 +75,7 @@
* @return The weighted probability
* @throws InvalidDatastoreException
*/
- public double featureWeight(Datastore datastore, String label, String feature)
+ double featureWeight(Datastore datastore, String label, String feature)
throws InvalidDatastoreException;
/**
@@ -91,7 +91,7 @@
* @throws InvalidDatastoreException
* @see Algorithm#featureWeight(Datastore, String, String)
*/
- public double documentWeight(Datastore datastore, String label,
+ double documentWeight(Datastore datastore, String label,
String[] document) throws InvalidDatastoreException;
/**
@@ -103,5 +103,5 @@
* @throws InvalidDatastoreException
* @return {@link Collection} of labels
*/
- public Collection<String> getLabels(Datastore datastore) throws InvalidDatastoreException;
+ Collection<String> getLabels(Datastore datastore) throws InvalidDatastoreException;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java Fri Oct 9 16:48:09 2009
@@ -32,7 +32,7 @@
* @return double value
* @throws InvalidDatastoreException
*/
- public double getWeight(String matrixName, String row, String column) throws InvalidDatastoreException;
+ double getWeight(String matrixName, String row, String column) throws InvalidDatastoreException;
/**
* Gets a double value from the Vector pointed to by the
@@ -43,7 +43,7 @@
* @return double value
* @throws InvalidDatastoreException
*/
- public double getWeight(String vectorName, String index) throws InvalidDatastoreException;
+ double getWeight(String vectorName, String index) throws InvalidDatastoreException;
/**
* get the keySet of a given Matrix/Vector as given by <code>name</code>
@@ -51,11 +51,11 @@
* @return Collection of keys of Matrix/Vector
* @throws InvalidDatastoreException
*/
- public Collection<String> getKeys(String name) throws InvalidDatastoreException;
+ Collection<String> getKeys(String name) throws InvalidDatastoreException;
/**
* Initializes the {@link Datastore} and loads the model into memory/cache if necessary
* @throws InvalidDatastoreException
*/
- public void initialize() throws InvalidDatastoreException;
+ void initialize() throws InvalidDatastoreException;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java Fri Oct 9 16:48:09 2009
@@ -47,6 +47,9 @@
private static final Logger log = LoggerFactory.getLogger(BayesClassifierDriver.class);
private static final Pattern CHUNK_DEIMITER_PATTERN = Pattern.compile("____");
+ private BayesClassifierDriver() {
+ }
+
/**
* Run the job
*
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java Fri Oct 9 16:48:09 2009
@@ -57,6 +57,7 @@
* @throws ClassNotFoundException
* @throws InterruptedException
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
JobConf conf = new JobConf(BayesDriver.class);
Path outPath = new Path(output);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java Fri Oct 9 16:48:09 2009
@@ -33,8 +33,6 @@
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
import org.apache.mahout.classifier.bayes.mapreduce.common.BayesJob;
import org.apache.mahout.classifier.bayes.mapreduce.common.JobExecutor;
-import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -64,6 +62,7 @@
* @param input the input pathname String
* @param output the output pathname String
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(BayesThetaNormalizerDriver.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java Fri Oct 9 16:48:09 2009
@@ -54,11 +54,11 @@
throws IOException {
String labelFeaturePair = key.toString();
- double alpha_i = 1.0;
-
+
int comma = labelFeaturePair.indexOf(',');
String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
- reporter.setStatus("Bayes Theta Normalizer Mapper: " + label);
+ reporter.setStatus("Bayes Theta Normalizer Mapper: " + label);
+ double alpha_i = 1.0;
double weight = Math.log((value.get() + alpha_i) / (labelWeightSum.get(label) + vocabCount));
output.collect(new Text(('_' + label).trim()), new DoubleWritable(weight));
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java Fri Oct 9 16:48:09 2009
@@ -57,6 +57,7 @@
* @throws ClassNotFoundException
* @throws InterruptedException
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
JobConf conf = new JobConf(CBayesDriver.class);
Path outPath = new Path(output);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesNormalizedWeightDriver.java Fri Oct 9 16:48:09 2009
@@ -62,6 +62,7 @@
* @param input the input pathname String
* @param output the output pathname String
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(CBayesNormalizedWeightDriver.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaDriver.java Fri Oct 9 16:48:09 2009
@@ -26,7 +26,6 @@
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
@@ -66,6 +65,7 @@
* @param input the input pathname String
* @param output the output pathname String
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(CBayesThetaDriver.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java Fri Oct 9 16:48:09 2009
@@ -62,6 +62,7 @@
* @param input the input pathname String
* @param output the output pathname String
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java Fri Oct 9 16:48:09 2009
@@ -44,7 +44,7 @@
private HTable table;
- private HBaseConfiguration HBconf;
+ private ThreadLocal<HBaseConfiguration> HBconf;
boolean useHbase = false;
@@ -85,8 +85,8 @@
else
return;
- HBconf = new HBaseConfiguration(job);
- table = new HTable(HBconf, job.get("output.table"));
+ HBconf.set(new HBaseConfiguration(job));
+ table = new HTable(HBconf.get(), job.get("output.table"));
} catch (IOException e) {
log.error("Unexpected error during configuration", e);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java Fri Oct 9 16:48:09 2009
@@ -56,6 +56,7 @@
* @param input the input pathname String
* @param output the output pathname String
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(BayesFeatureDriver.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java Fri Oct 9 16:48:09 2009
@@ -31,7 +31,6 @@
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.GenericsUtil;
import org.apache.mahout.classifier.bayes.common.BayesParameters;
@@ -71,6 +70,7 @@
* @param output the output pathname String
* @throws ClassNotFoundException
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException {
JobClient client = new JobClient();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java Fri Oct 9 16:48:09 2009
@@ -48,8 +48,6 @@
private HTable table;
- private HBaseConfiguration HBconf;
-
boolean useHbase = false;
@Override
@@ -106,8 +104,8 @@
"bayes.parameters", ""));
if(params.get("dataSource").equals("hbase"))useHbase = true;
else return;
-
- HBconf = new HBaseConfiguration(job);
+
+ HBaseConfiguration HBconf = new HBaseConfiguration(job);
table = new HTable(HBconf, job.get("output.table"));
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java Fri Oct 9 16:48:09 2009
@@ -51,6 +51,7 @@
* @param input the input pathname String
* @param output the output pathname String
*/
+ @Override
public void runJob(String input, String output, BayesParameters params) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(BayesWeightSummerDriver.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java Fri Oct 9 16:48:09 2009
@@ -44,8 +44,6 @@
private HTable table;
- private HBaseConfiguration HBconf;
-
boolean useHbase = false;
@Override
@@ -100,7 +98,7 @@
else
return;
- HBconf = new HBaseConfiguration(job);
+ HBaseConfiguration HBconf = new HBaseConfiguration(job);
table = new HTable(HBconf, job.get("output.table"));
} catch (IOException e) {
log.error("Unexpected error during configuration", e);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java Fri Oct 9 16:48:09 2009
@@ -42,7 +42,7 @@
* @param job the job to execute.
* @throws Exception any exception thrown at job execution.
* */
- public void execute(final String args[], final BayesJob job) throws Exception {
+ public static void execute(final String[] args, final BayesJob job) throws Exception {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java Fri Oct 9 16:48:09 2009
@@ -29,8 +29,7 @@
protected Algorithm algorithm = null;
protected Datastore datastore = null;
- public ClassifierContext(Algorithm algorithm, Datastore datastore) throws InvalidDatastoreException
- {
+ public ClassifierContext(Algorithm algorithm, Datastore datastore) {
this.algorithm = algorithm;
this.datastore = datastore;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java Fri Oct 9 16:48:09 2009
@@ -103,7 +103,7 @@
int numReduceTasks = Integer.parseInt(cmdLine.getValue(numRedOpt).toString());
boolean doCanopy = Boolean.parseBoolean(cmdLine.getValue(doCanopyOpt).toString());
float m = Float.parseFloat(cmdLine.getValue(mOpt).toString());
- String vectorClassName = cmdLine.getValue(vectorClassOpt).toString();;
+ String vectorClassName = cmdLine.getValue(vectorClassOpt).toString();
Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
runJob(input, clusters, output, measureClass, convergenceDelta,
maxIterations, numMapTasks, numReduceTasks, doCanopy, m, vectorClass);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Fri Oct 9 16:48:09 2009
@@ -89,20 +89,19 @@
final Cluster cluster;
if (beginIndex <= 0) {
throw new IllegalArgumentException(ERROR_UNKNOWN_CLUSTER_FORMAT + formattedString);
+ }
+ final String id = formattedString.substring(0, beginIndex);
+ final String center = formattedString.substring(beginIndex);
+ final char firstChar = id.charAt(0);
+ final boolean startsWithV = firstChar == 'V';
+ if (firstChar == 'C' || startsWithV) {
+ final int clusterId = Integer.parseInt(formattedString.substring(1,
+ beginIndex - 2));
+ final Vector clusterCenter = AbstractVector.decodeVector(center);
+ cluster = new Cluster(clusterCenter, clusterId);
+ cluster.converged = startsWithV;
} else {
- final String id = formattedString.substring(0, beginIndex);
- final String center = formattedString.substring(beginIndex);
- final char firstChar = id.charAt(0);
- final boolean startsWithV = firstChar == 'V';
- if (firstChar == 'C' || startsWithV) {
- final int clusterId = Integer.parseInt(formattedString.substring(1,
- beginIndex - 2));
- final Vector clusterCenter = AbstractVector.decodeVector(center);
- cluster = new Cluster(clusterCenter, clusterId);
- cluster.converged = startsWithV;
- } else {
- throw new IllegalArgumentException(ERROR_UNKNOWN_CLUSTER_FORMAT + formattedString);
- }
+ throw new IllegalArgumentException(ERROR_UNKNOWN_CLUSTER_FORMAT + formattedString);
}
return cluster;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java Fri Oct 9 16:48:09 2009
@@ -18,7 +18,6 @@
package org.apache.mahout.common;
import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
-import org.apache.mahout.common.IOUtils;
import java.io.BufferedReader;
import java.io.Closeable;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Fri Oct 9 16:48:09 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.common.commandline;
import org.apache.commons.cli2.Option;
@@ -5,6 +22,9 @@
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
public class DefaultOptionCreator {
+ private DefaultOptionCreator() {
+ }
+
/**
* Returns a default command line option for convergence delta specification.
*/
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdNeighborhoodTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdNeighborhoodTest.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdNeighborhoodTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdNeighborhoodTest.java Fri Oct 9 16:48:09 2009
@@ -30,7 +30,7 @@
long[] neighborhood =
new ThresholdUserNeighborhood(1.0, new DummySimilarity(dataModel), dataModel).getUserNeighborhood(1);
assertNotNull(neighborhood);
- assertTrue(neighborhood.length == 0);
+ assertEquals(0, neighborhood.length);
long[] neighborhood2 =
new ThresholdUserNeighborhood(0.8, new DummySimilarity(dataModel), dataModel).getUserNeighborhood(1);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarityTest.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarityTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarityTest.java Fri Oct 9 16:48:09 2009
@@ -67,7 +67,7 @@
assertCorrelationEquals(-0.5, correlation);
}
- public void testRefresh() throws TasteException {
+ public void testRefresh() {
// Make sure this doesn't throw an exception
new SpearmanCorrelationSimilarity(getDataModel()).refresh(null);
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java Fri Oct 9 16:48:09 2009
@@ -24,7 +24,6 @@
import org.apache.mahout.classifier.bayes.datastore.InMemoryBayesDatastore;
import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
-import org.apache.mahout.classifier.bayes.interfaces.Datastore;
import org.apache.mahout.classifier.bayes.model.ClassifierContext;
public class BayesClassifierTest extends TestCase {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Fri Oct 9 16:48:09 2009
@@ -345,8 +345,7 @@
int i = 0;
for (String key : keys) {
List<Canopy> data = collector.getValue(key);
- assertTrue(manhattanCentroids.get(i).asFormatString() + " is not equal to " + data.get(0).computeCentroid().asFormatString(),
- manhattanCentroids.get(i).equals(data.get(0).computeCentroid()));
+ assertEquals(manhattanCentroids.get(i).asFormatString() + " is not equal to " + data.get(0).computeCentroid().asFormatString(), manhattanCentroids.get(i), data.get(0).computeCentroid());
i++;
}
}
@@ -367,8 +366,7 @@
int i = 0;
for (String key : keys) {
List<Canopy> data = collector.getValue(key);
- assertTrue(euclideanCentroids.get(i).asFormatString() + " is not equal to " + data.get(0).computeCentroid().asFormatString(),
- euclideanCentroids.get(i).equals(data.get(0).computeCentroid()));
+ assertEquals(euclideanCentroids.get(i).asFormatString() + " is not equal to " + data.get(0).computeCentroid().asFormatString(), euclideanCentroids.get(i), data.get(0).computeCentroid());
i++;
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java Fri Oct 9 16:48:09 2009
@@ -471,8 +471,8 @@
System.out.println("ref= " + key.toString() + " cluster= "
+ cluster.toString());
cluster.recomputeCenter();
- assertTrue("key center: " + key.getCenter().asFormatString() + " does not equal cluster: " +
- cluster.getCenter().asFormatString(), key.getCenter().equals(cluster.getCenter()));
+ assertEquals("key center: " + key.getCenter().asFormatString() + " does not equal cluster: " +
+ cluster.getCenter().asFormatString(), key.getCenter(), cluster.getCenter());
}
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Fri Oct 9 16:48:09 2009
@@ -17,7 +17,6 @@
package org.apache.mahout.clustering.kmeans;
-
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -87,7 +86,7 @@
* @param measure the DistanceMeasure to use
* @param maxIter the maximum number of iterations
*/
- private void referenceKmeans(List<Vector> points, List<Cluster> clusters,
+ private static void referenceKmeans(List<Vector> points, List<Cluster> clusters,
DistanceMeasure measure, int maxIter) {
boolean converged = false;
int iteration = 0;
@@ -342,7 +341,7 @@
//Since we aren't roundtripping through Writable, we need to compare the reference center with the cluster centroid
cluster.recomputeCenter();
assertTrue(i + " reference center: " + ref.getCenter().asFormatString() + " and cluster center: "
- + cluster.getCenter().asFormatString() + " are not equal", AbstractVector.equivalent(ref.getCenter(), cluster.getCenter()));
+ + cluster.getCenter().asFormatString() + " are not equal", AbstractVector.equivalent(ref.getCenter(), cluster.getCenter()));
/*assertEquals(k + " center[" + key + "][1]", ref.getCenter().get(1),
cluster.getCenter().get(1));*/
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java Fri Oct 9 16:48:09 2009
@@ -57,7 +57,7 @@
compare(distanceMeasure, vectors);
}
- private void compare(DistanceMeasure distanceMeasure, Vector[] vectors) {
+ private static void compare(DistanceMeasure distanceMeasure, Vector[] vectors) {
double[][] distanceMatrix = new double[3][3];
for (int a = 0; a < 3; a++) {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestDenseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestDenseVector.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestDenseVector.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestDenseVector.java Fri Oct 9 16:48:09 2009
@@ -113,10 +113,10 @@
int i = 0;
while (nzIter.hasNext()) {
Vector.Element elt = nzIter.next();
- assertTrue((elt.index()) + " Value: " + values[elt.index()] + " does not equal: " + elt.get(), values[elt.index()] == elt.get());
+ assertEquals((elt.index()) + " Value: " + values[elt.index()] + " does not equal: " + elt.get(), values[elt.index()], elt.get(), 0.0);
i++;
}
- assertTrue(i + " does not equal: " + expectedNum, i == expectedNum);
+ assertEquals(i + " does not equal: " + expectedNum, i, expectedNum);
}
public void testSize() throws Exception {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestOrderedIntDoubleMapping.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestOrderedIntDoubleMapping.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestOrderedIntDoubleMapping.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestOrderedIntDoubleMapping.java Fri Oct 9 16:48:09 2009
@@ -58,7 +58,7 @@
OrderedIntDoubleMapping mapping = new OrderedIntDoubleMapping(1);
mapping.set(0, 1.1);
mapping.set(5, 6.6);
- OrderedIntDoubleMapping clone = (OrderedIntDoubleMapping) mapping.clone();
+ OrderedIntDoubleMapping clone = mapping.clone();
assertEquals(2, clone.getNumMappings());
assertEquals(1.1, clone.get(0));
assertEquals(0.0, clone.get(1));
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java Fri Oct 9 16:48:09 2009
@@ -83,8 +83,8 @@
private static void checkIterator(Iterator<Vector.Element> nzIter, double[] values) {
while (nzIter.hasNext()) {
Vector.Element elt = nzIter.next();
- assertTrue((elt.index()) + " Value: " + values[elt.index()]
- + " does not equal: " + elt.get(), values[elt.index()] == elt.get());
+ assertEquals((elt.index()) + " Value: " + values[elt.index()]
+ + " does not equal: " + elt.get(), values[elt.index()], elt.get(), 0.0);
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorView.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorView.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorView.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorView.java Fri Oct 9 16:48:09 2009
@@ -91,8 +91,8 @@
int i = 0;
while (iter.hasNext()) {
Vector.Element elt = iter.next();
- assertTrue((elt.index()) + " Value: " + gold[i]
- + " does not equal: " + elt.get(), gold[i] == elt.get());
+ assertEquals((elt.index()) + " Value: " + gold[i]
+ + " does not equal: " + elt.get(), gold[i], elt.get(), 0.0);
i++;
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java Fri Oct 9 16:48:09 2009
@@ -51,43 +51,40 @@
right.setQuick(1, 2);
right.setQuick(2, 3);
assertTrue("equivalent didn't work", AbstractVector.equivalent(left, right));
- assertTrue("equals didn't work", left.equals(right));
- assertTrue("equivalent didn't work", AbstractVector.strictEquivalence(left, right) == false);
+ assertEquals("equals didn't work", left, right);
+ assertEquals("equivalent didn't work", false, AbstractVector.strictEquivalence(left, right));
DenseVector leftBar = new DenseVector("bar", 3);
leftBar.setQuick(0, 1);
leftBar.setQuick(1, 2);
leftBar.setQuick(2, 3);
assertTrue("equivalent didn't work", AbstractVector.equivalent(leftBar, right));
- assertTrue("equals didn't work", leftBar.equals(right) == false);
- assertTrue("equivalent didn't work", AbstractVector.strictEquivalence(left, right) == false);
+ assertEquals("equals didn't work", false, leftBar.equals(right));
+ assertEquals("equivalent didn't work", false, AbstractVector.strictEquivalence(left, right));
SparseVector rightBar = new SparseVector("bar", 3);
rightBar.setQuick(0, 1);
rightBar.setQuick(1, 2);
rightBar.setQuick(2, 3);
assertTrue("equivalent didn't work", AbstractVector.equivalent(left, rightBar));
- assertTrue("equals didn't work", left.equals(rightBar) == false);
- assertTrue("equivalent didn't work", AbstractVector.strictEquivalence(left, rightBar) == false);
+ assertEquals("equals didn't work", false, left.equals(rightBar));
+ assertEquals("equivalent didn't work", false, AbstractVector.strictEquivalence(left, rightBar));
right.setQuick(2, 4);
- assertTrue("equivalent didn't work",
- AbstractVector.equivalent(left, right) == false);
- assertTrue("equals didn't work", left.equals(right) == false);
+ assertEquals("equivalent didn't work", false, AbstractVector.equivalent(left, right));
+ assertEquals("equals didn't work", false, left.equals(right));
right = new DenseVector(4);
right.setQuick(0, 1);
right.setQuick(1, 2);
right.setQuick(2, 3);
right.setQuick(3, 3);
- assertTrue("equivalent didn't work",
- AbstractVector.equivalent(left, right) == false);
- assertTrue("equals didn't work", left.equals(right) == false);
+ assertEquals("equivalent didn't work", false, AbstractVector.equivalent(left, right));
+ assertEquals("equals didn't work", false, left.equals(right));
left = new SparseVector(2);
left.setQuick(0, 1);
left.setQuick(1, 2);
- assertTrue("equivalent didn't work",
- AbstractVector.equivalent(left, right) == false);
- assertTrue("equals didn't work", left.equals(right) == false);
+ assertEquals("equivalent didn't work", false, AbstractVector.equivalent(left, right));
+ assertEquals("equals didn't work", false, left.equals(right));
DenseVector dense = new DenseVector(3);
right = new DenseVector(3);
@@ -97,9 +94,9 @@
dense.setQuick(0, 1);
dense.setQuick(1, 2);
dense.setQuick(2, 3);
- assertTrue("equivalent didn't work", AbstractVector
- .equivalent(dense, right) == true);
- assertTrue("equals didn't work", dense.equals(right) == true);
+ assertEquals("equivalent didn't work", true, AbstractVector
+ .equivalent(dense, right));
+ assertEquals("equals didn't work", true, dense.equals(right));
SparseVector sparse = new SparseVector(3);
left = new SparseVector(3);
@@ -109,21 +106,19 @@
left.setQuick(0, 1);
left.setQuick(1, 2);
left.setQuick(2, 3);
- assertTrue("equivalent didn't work", AbstractVector
- .equivalent(sparse, left) == true);
- assertTrue("equals didn't work", left.equals(sparse) == true);
+ assertEquals("equivalent didn't work", true, AbstractVector
+ .equivalent(sparse, left));
+ assertEquals("equals didn't work", true, left.equals(sparse));
VectorView v1 = new VectorView(left, 0, 2);
VectorView v2 = new VectorView(right, 0, 2);
- assertTrue("equivalent didn't work",
- AbstractVector.equivalent(v1, v2) == true);
- assertTrue("equals didn't work", v1.equals(v2) == true);
+ assertEquals("equivalent didn't work", true, AbstractVector.equivalent(v1, v2));
+ assertEquals("equals didn't work", true, v1.equals(v2));
sparse = new SparseVector(2);
sparse.setQuick(0, 1);
sparse.setQuick(1, 2);
- assertTrue("equivalent didn't work",
- AbstractVector.equivalent(v1, sparse) == true);
- assertTrue("equals didn't work", v1.equals(sparse) == true);
+ assertEquals("equivalent didn't work", true, AbstractVector.equivalent(v1, sparse));
+ assertEquals("equals didn't work", true, v1.equals(sparse));
}
@@ -139,7 +134,7 @@
String formattedString = left.asFormatString();
System.out.println("Vec: " + formattedString);
Vector vec = AbstractVector.decodeVector(formattedString);
- assertTrue("vec is null and it shouldn't be", vec != null);
+ assertNotNull("vec is null and it shouldn't be", vec);
assertTrue("Vector could not be decoded from the formatString",
AbstractVector.equivalent(vec, left));
}
@@ -151,22 +146,22 @@
vec1.setQuick(1, 2);
vec1.setQuick(2, 3);
Vector norm = vec1.normalize();
- assertTrue("norm1 is null and it shouldn't be", norm != null);
+ assertNotNull("norm1 is null and it shouldn't be", norm);
Vector expected = new SparseVector(3);
expected.setQuick(0, 0.2672612419124244);
expected.setQuick(1, 0.5345224838248488);
expected.setQuick(2, 0.8017837257372732);
- assertTrue("norm is not equal to expected", norm.equals(expected));
+ assertEquals("norm is not equal to expected", norm, expected);
norm = vec1.normalize(2);
- assertTrue("norm is not equal to expected", norm.equals(expected));
+ assertEquals("norm is not equal to expected", norm, expected);
norm = vec1.normalize(1);
expected.setQuick(0, 1.0 / 6);
expected.setQuick(1, 2.0 / 6);
expected.setQuick(2, 3.0 / 6);
- assertTrue("norm is not equal to expected", norm.equals(expected));
+ assertEquals("norm is not equal to expected", norm, expected);
norm = vec1.normalize(3);
// TODO this is not used
expected = vec1.times(vec1).times(vec1);
@@ -176,28 +171,28 @@
double cube = Math.pow(36, 1.0 / 3);
expected = vec1.divide(cube);
- assertTrue("norm: " + norm.asFormatString() + " is not equal to expected: "
- + expected.asFormatString(), norm.equals(expected));
+ assertEquals("norm: " + norm.asFormatString() + " is not equal to expected: "
+ + expected.asFormatString(), norm, expected);
norm = vec1.normalize(Double.POSITIVE_INFINITY);
// The max is 3, so we divide by that.
expected.setQuick(0, 1.0 / 3);
expected.setQuick(1, 2.0 / 3);
expected.setQuick(2, 3.0 / 3);
- assertTrue("norm: " + norm.asFormatString() + " is not equal to expected: "
- + expected.asFormatString(), norm.equals(expected));
+ assertEquals("norm: " + norm.asFormatString() + " is not equal to expected: "
+ + expected.asFormatString(), norm, expected);
norm = vec1.normalize(0);
// The max is 3, so we divide by that.
expected.setQuick(0, 1.0 / 3);
expected.setQuick(1, 2.0 / 3);
expected.setQuick(2, 3.0 / 3);
- assertTrue("norm: " + norm.asFormatString() + " is not equal to expected: "
- + expected.asFormatString(), norm.equals(expected));
+ assertEquals("norm: " + norm.asFormatString() + " is not equal to expected: "
+ + expected.asFormatString(), norm, expected);
try {
vec1.normalize(-1);
- assertTrue(false);
+ fail();
} catch (IllegalArgumentException e) {
// expected
}
@@ -212,10 +207,10 @@
vec1.setQuick(2, 2);
double max = vec1.maxValue();
- assertTrue(max + " does not equal: " + 3, max == 3);
+ assertEquals(max + " does not equal: " + 3, 3, max, 0.0);
int idx = vec1.maxValueIndex();
- assertTrue(idx + " does not equal: " + 1, idx == 1);
+ assertEquals(idx + " does not equal: " + 1, 1, idx);
}
@@ -360,13 +355,13 @@
String formatString = test.asFormatString();
Vector decode = AbstractVector.decodeVector(formatString);
- assertTrue("test and decode are not equal", test.equals(decode));
+ assertEquals("test and decode are not equal", test, decode);
Vector noName = new DenseVector(values);
formatString = noName.asFormatString();
decode = AbstractVector.decodeVector(formatString);
- assertTrue("noName and decode are not equal", noName.equals(decode));
+ assertEquals("noName and decode are not equal", noName, decode);
}
public void testLabelSerializationSparse() {
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java Fri Oct 9 16:48:09 2009
@@ -96,7 +96,7 @@
});
}
- public static void main(String[] args) throws IOException {
+ public static void main(String[] args) {
RandomUtils.useTestSeed();
generateSamples();
new DisplayDirichlet();
@@ -126,7 +126,7 @@
// plot the axes
g2.setColor(Color.BLACK);
- Vector dv = new DenseVector(2).assign(size / 2);
+ Vector dv = new DenseVector(2).assign(size / 2.0);
plotRectangle(g2, new DenseVector(2).assign(2), dv);
plotRectangle(g2, new DenseVector(2).assign(-2), dv);
@@ -143,7 +143,7 @@
* @param v a Vector of rectangle centers
* @param dv a Vector of rectangle sizes
*/
- public void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
+ public static void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
double[] flip = { 1, -1 };
Vector v2 = v.clone().assign(new DenseVector(flip), new TimesFunction());
v2 = v2.minus(dv.divide(2));
@@ -160,7 +160,7 @@
* @param v a Vector of rectangle centers
* @param dv a Vector of rectangle sizes
*/
- public void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
+ public static void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
double[] flip = { 1, -1 };
Vector v2 = v.clone().assign(new DenseVector(flip), new TimesFunction());
v2 = v2.minus(dv.divide(2));
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java Fri Oct 9 16:48:09 2009
@@ -97,7 +97,6 @@
*/
private static boolean iterateReference(List<Vector> points,
List<Cluster> clusters, DistanceMeasure measure) {
- boolean converged = true;
// iterate through all points, assigning each to the nearest cluster
for (Vector point : points) {
Cluster closestCluster = null;
@@ -112,6 +111,7 @@
closestCluster.addPoint(point);
}
// test for convergence
+ boolean converged = true;
for (Cluster cluster : clusters) {
if (!cluster.computeConvergence())
converged = false;
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java Fri Oct 9 16:48:09 2009
@@ -50,7 +50,7 @@
// plot the axes
g2.setColor(Color.BLACK);
- Vector dv = new DenseVector(2).assign(size / 2);
+ Vector dv = new DenseVector(2).assign(size / 2.0);
Vector dv1 = new DenseVector(2).assign(MeanShiftCanopy.t1);
Vector dv2 = new DenseVector(2).assign(MeanShiftCanopy.t2);
plotRectangle(g2, new DenseVector(2).assign(2), dv);
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Fri Oct 9 16:48:09 2009
@@ -17,8 +17,6 @@
package org.apache.mahout.clustering.syntheticcontrol.canopy;
-import static org.apache.mahout.clustering.syntheticcontrol.Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
-
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
@@ -26,6 +24,7 @@
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
+import org.apache.mahout.clustering.syntheticcontrol.Constants;
import org.apache.mahout.matrix.Vector;
import org.apache.mahout.matrix.SparseVector;
@@ -75,7 +74,7 @@
FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
if (dfs.exists(outPath))
dfs.delete(outPath, true);
- final String directoryContainingConvertedInput = output + DIRECTORY_CONTAINING_CONVERTED_INPUT;
+ final String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
InputDriver.runJob(input, directoryContainingConvertedInput, vectorClass);
CanopyClusteringJob.runJob(directoryContainingConvertedInput, output, measureClassName,
t1, t2, vectorClass);
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java Fri Oct 9 16:48:09 2009
@@ -32,6 +32,7 @@
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.builder.DefaultTreeBuilder;
import org.apache.mahout.df.callback.ForestPredictions;
import org.apache.mahout.df.callback.MeanTreeCollector;
@@ -77,21 +78,12 @@
* @param nbtrees number of trees to grow
* @throws Exception if an error occured while growing the trees
*/
- protected static void runIteration(Data data, int m, int nbtrees)
- throws Exception {
+ protected static void runIteration(Data data, int m, int nbtrees) {
final int dataSize = data.size();
final int nblabels = data.getDataset().nblabels();
- double oobM; // oob error estimate when m = log2(M)+1
- double oobOne; // oob error estimate when m = 1
-
- ForestPredictions errorM; // oob error when using m = log2(M)+1
- ForestPredictions errorOne; // oob error when using m = 1
- ForestPredictions testError; // test set error
- MeanTreeCollector treeError; // mean tree error
-
- Random rng = new Random(1L);
+ Random rng = RandomUtils.getRandom();
Data train = data.clone();
Data test = train.rsplit(rng, (int) (data.size() * 0.1));
@@ -103,21 +95,19 @@
SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder, train);
- long time;
-
// grow a forest with m = log2(M)+1
- errorM = new ForestPredictions(dataSize, nblabels);
+ ForestPredictions errorM = new ForestPredictions(dataSize, nblabels); // oob error when using m = log2(M)+1
treeBuilder.setM(m);
- time = System.currentTimeMillis();
+ long time = System.currentTimeMillis();
log.info("Growing a forest with m=" + m);
DecisionForest forestM = forestBuilder.build(nbtrees, errorM);
sumTimeM += System.currentTimeMillis() - time;
- oobM = ErrorEstimate.errorRate(trainLabels, errorM.computePredictions(rng));
+ double oobM = ErrorEstimate.errorRate(trainLabels, errorM.computePredictions(rng)); // oob error estimate when m = log2(M)+1
// grow a forest with m=1
- errorOne = new ForestPredictions(dataSize, nblabels);
+ ForestPredictions errorOne = new ForestPredictions(dataSize, nblabels); // oob error when using m = 1
treeBuilder.setM(1);
time = System.currentTimeMillis();
@@ -125,12 +115,12 @@
DecisionForest forestOne = forestBuilder.build(nbtrees, errorOne);
sumTimeOne += System.currentTimeMillis() - time;
- oobOne = ErrorEstimate.errorRate(trainLabels, errorOne.computePredictions(rng));
+ double oobOne = ErrorEstimate.errorRate(trainLabels, errorOne.computePredictions(rng)); // oob error estimate when m = 1
// compute the test set error (Selection Error), and mean tree error (One Tree Error),
// using the lowest oob error forest
- testError = new ForestPredictions(dataSize, nblabels);
- treeError = new MeanTreeCollector(train, nbtrees);
+ ForestPredictions testError = new ForestPredictions(dataSize, nblabels); // test set error
+ MeanTreeCollector treeError = new MeanTreeCollector(train, nbtrees); // mean tree error
// compute the test set error using m=1 (Single Input Error)
errorOne = new ForestPredictions(dataSize, nblabels);
@@ -153,6 +143,7 @@
System.exit(res);
}
+ @Override
public int run(String[] args) throws Exception {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java Fri Oct 9 16:48:09 2009
@@ -203,7 +203,7 @@
return forest;
}
- protected Data loadData(Configuration conf, Path dataPath, Dataset dataset)
+ protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset)
throws Exception {
log.info("Loading the data...");
FileSystem fs = dataPath.getFileSystem(conf);
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombinerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombinerTest.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombinerTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombinerTest.java Fri Oct 9 16:48:09 2009
@@ -74,7 +74,7 @@
assertEquals(3, nbvalues);
}
- List<Text> asList(String... strings) {
+ static List<Text> asList(String... strings) {
List<Text> values = new ArrayList<Text>();
for (String value : strings) {
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducerTest.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducerTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducerTest.java Fri Oct 9 16:48:09 2009
@@ -73,7 +73,7 @@
assertTrue(expected.containsAll(actual));
}
- List<Text> asList(String... strings) {
+ static List<Text> asList(String... strings) {
List<Text> values = new ArrayList<Text>();
for (String value : strings) {
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java Fri Oct 9 16:48:09 2009
@@ -26,8 +26,7 @@
/**
- * Reads in a file containing {@link org.apache.mahout.matrix.Vector}s and provides
- * a {@link org.apache.mahout.utils.vectors.VectorIterable} interface to them.
+ * Reads in a file containing {@link org.apache.mahout.matrix.Vector}s.
* <p/>
* The key is any {@link org.apache.hadoop.io.Writable} and the value is a {@link org.apache.mahout.matrix.Vector}.
* It can handle any class that implements Vector as long as it has a no-arg constructor.
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java Fri Oct 9 16:48:09 2009
@@ -64,10 +64,9 @@
/**
* Does NOT close the underlying writer
- * @throws IOException
*/
@Override
- public void close() throws IOException {
+ public void close() {
}
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/TermInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/TermInfoWriter.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/TermInfoWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/TermInfoWriter.java Fri Oct 9 16:48:09 2009
@@ -25,5 +25,5 @@
void write(TermInfo ti) throws IOException;
- void close() throws IOException;
+ void close();
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java Fri Oct 9 16:48:09 2009
@@ -24,7 +24,6 @@
public interface VectorWriter {
/**
* Write all values in the Iterable to the output
- * @param iterable The {@link org.apache.mahout.utils.vectors.VectorIterable}
* @return the number of docs written
* @throws IOException if there was a problem writing
*
@@ -34,7 +33,6 @@
/**
* Write the first <code>maxDocs</code> to the output.
- * @param iterable The {@link org.apache.mahout.utils.vectors.VectorIterable}
* @param maxDocs the maximum number of docs to write
* @return The number of docs written
* @throws IOException if there was a problem writing
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java?rev=823611&r1=823610&r2=823611&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java Fri Oct 9 16:48:09 2009
@@ -31,7 +31,7 @@
*/
public class TFDFMapper extends VectorMapper {
- public static final int DEFAULT_CACHE_SIZE = 256;
+ //public static final int DEFAULT_CACHE_SIZE = 256;
private final IndexReader reader; // TODO never used?
private Vector vector;