You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/08/24 22:16:40 UTC
svn commit: r807361 [1/2] - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/
core/src/main/java/org/apache/mahout/classifier/bayes/
core/src/main/java/org/apache/mahout/classifier/cbayes/
core/src/main/java/org/a...
Author: srowen
Date: Mon Aug 24 20:16:37 2009
New Revision: 807361
URL: http://svn.apache.org/viewvc?rev=807361&view=rev
Log:
MAHOUT-166
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java Mon Aug 24 20:16:37 2009
@@ -35,7 +35,6 @@
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
-import java.util.concurrent.atomic.AtomicInteger;
/**
* <p>A {@link Recommender} which caches the results from another {@link Recommender} in memory. Results are held by
@@ -46,7 +45,7 @@
private static final Logger log = LoggerFactory.getLogger(CachingRecommender.class);
private final Recommender recommender;
- private final AtomicInteger maxHowMany;
+ private final int[] maxHowMany;
private final Cache<Long, Recommendations> recommendationCache;
private final Cache<LongPair, Float> estimatedPrefCache;
private final RefreshHelper refreshHelper;
@@ -57,7 +56,7 @@
throw new IllegalArgumentException("recommender is null");
}
this.recommender = recommender;
- this.maxHowMany = new AtomicInteger(1);
+ this.maxHowMany = new int[] {1};
// Use "num users" as an upper limit on cache size. Rough guess.
int numUsers = recommender.getDataModel().getNumUsers();
this.recommendationCache =
@@ -105,8 +104,8 @@
}
synchronized (maxHowMany) {
- if (howMany > maxHowMany.get()) {
- maxHowMany.set(howMany);
+ if (howMany > maxHowMany[0]) {
+ maxHowMany[0] = howMany;
}
}
@@ -186,7 +185,7 @@
@Override
public Recommendations get(Long key) throws TasteException {
log.debug("Retrieving new recommendations for user ID '{}'", key);
- int howMany = maxHowMany.get();
+ int howMany = maxHowMany[0];
Rescorer<Long> rescorer = getCurrentRescorer();
List<RecommendedItem> recommendations = rescorer == null ?
recommender.recommend(key, howMany) :
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java Mon Aug 24 20:16:37 2009
@@ -56,7 +56,8 @@
String labelFeaturePair = key.toString();
double alpha_i = 1.0;
- String label = labelFeaturePair.split(",")[0];
+ int comma = labelFeaturePair.indexOf(',');
+ String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
double weight = Math.log((value.get() + alpha_i) / (labelWeightSum.get(label) + vocabCount));
output.collect(new Text(('_' + label).trim()), new DoubleWritable(weight));
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java Mon Aug 24 20:16:37 2009
@@ -52,7 +52,8 @@
String labelFeaturePair = key.toString();
- String label = labelFeaturePair.split(",")[0];
+ int comma = labelFeaturePair.indexOf(',');
+ String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
output.collect(key, new DoubleWritable(-Math.log(value.get()) / thetaNormalizer.get(label)));// output -D_ij
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java Mon Aug 24 20:16:37 2009
@@ -63,7 +63,8 @@
output.collect(new Text((stringDoubleEntry.getKey() + ',' + feature).trim()), weight); //output Sigma_j
}
} else {
- String label = labelFeaturePair.split(",")[0];
+ int comma = labelFeaturePair.indexOf(',');
+ String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
double inverseDenominator = 1.0 / (sigma_jSigma_k - labelWeightSum.get(label) + vocabCount);
DoubleWritable weight = new DoubleWritable(-value.get() * inverseDenominator);
output.collect(key, weight);//output -D_ij
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java Mon Aug 24 20:16:37 2009
@@ -63,7 +63,8 @@
}
} else {
- String label = labelFeaturePair.split(",")[0];
+ int comma = labelFeaturePair.indexOf(',');
+ String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
double D_ij = value.get();
double denominator = 0.5 * ((sigma_jSigma_k / vocabCount) + (D_ij * this.labelWeightSum.size()));
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java Mon Aug 24 20:16:37 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.clustering;
import org.apache.hadoop.io.Writable;
@@ -8,10 +25,6 @@
import java.io.DataOutput;
import java.io.IOException;
-/**
- *
- *
- **/
public abstract class ClusterBase implements Writable {
// this cluster's clusterId
protected int id;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
import org.apache.mahout.matrix.DenseVector;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
import org.apache.mahout.matrix.DenseVector;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
import org.uncommons.maths.random.GaussianGenerator;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet.models;
+
import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java Mon Aug 24 20:16:37 2009
@@ -1,7 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
-import org.apache.hadoop.io.Writable;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -19,6 +15,10 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet.models;
+
+import org.apache.hadoop.io.Writable;
+
/**
* A model is a probability distribution over observed data points and allows the probability of any data point to be
* computed.
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet.models;
+
/** A model distribution allows us to sample a model from its prior distribution. */
public interface ModelDistribution<O> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet.models;
+
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet.models;
+
import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.clustering.fuzzykmeans;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,7 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.fuzzykmeans;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.clustering.kmeans;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.kmeans;
+
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.clustering.kmeans;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.kmeans;
+
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Mon Aug 24 20:16:37 2009
@@ -68,7 +68,7 @@
@Override
public boolean equals(Object o) {
- if (!(o instanceof String)) {
+ if (!(o instanceof StringDoublePair)) {
return false;
}
StringDoublePair other = (StringDoublePair) o;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java Mon Aug 24 20:16:37 2009
@@ -292,9 +292,12 @@
@Override
public int hashCode() {
- int result = (values != null ? values.hashCode() : 0);
- result = 31 * result + name.hashCode();
-
+ int result = name.hashCode();
+ if (values != null) {
+ for (double value : values) {
+ result = 31 * result + (int) Double.doubleToLongBits(value);
+ }
+ }
return result;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils;
+
import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.matrix.CardinalityException;
import org.apache.mahout.matrix.Vector;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java Mon Aug 24 20:16:37 2009
@@ -19,6 +19,8 @@
import com.thoughtworks.xstream.XStream;
+import java.util.regex.Pattern;
+
/**
* Offers two methods to convert an object to a string representation and restore the object given its string
* representation. Should use Hadoop Stringifier whenever available.
@@ -26,6 +28,7 @@
public final class StringUtils {
private static final XStream xstream = new XStream();
+ private static final Pattern NEWLINE_PATTERN = Pattern.compile("\n");
private StringUtils() {
// do nothing
@@ -38,7 +41,7 @@
* @return the string representation of the object
*/
public static String toString(Object obj) {
- return xstream.toXML(obj).replaceAll("\n", "");
+ return NEWLINE_PATTERN.matcher(xstream.toXML(obj)).replaceAll("");
}
/**
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import junit.framework.TestCase;
import org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution;
import org.apache.mahout.clustering.dirichlet.models.Model;
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import junit.framework.TestCase;
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.lda;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,15 +15,16 @@
* limitations under the License.
*/
-import java.util.ArrayList;
+package org.apache.mahout.clustering.lda;
+
import java.util.Iterator;
-import java.util.List;
import java.util.Random;
import junit.framework.TestCase;
import org.apache.commons.math.distribution.PoissonDistribution;
import org.apache.commons.math.distribution.PoissonDistributionImpl;
+import org.apache.commons.math.MathException;
import org.apache.mahout.matrix.DenseMatrix;
import org.apache.mahout.matrix.DenseVector;
@@ -34,14 +33,14 @@
public class TestLDAInference extends TestCase {
- private Random random;
+ private static final int NUM_TOPICS = 20;
- private static int NUM_TOPICS = 20;
+ private Random random;
@Override
protected void setUp() throws Exception {
super.setUp();
- random = new Random();
+ random = new Random(0xCAFEBABECAFEBABEL);
}
/**
@@ -49,17 +48,12 @@
* @param numWords int number of words in the vocabulary
* @param numWords E[count] for each word
*/
- private Vector generateRandomDoc(int numWords, double sparsity) {
+ private Vector generateRandomDoc(int numWords, double sparsity) throws MathException {
Vector v = new DenseVector(numWords);
- try {
- PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
- for (int i = 0; i < numWords; i++) {
- // random integer
- v.setQuick(i, dist.inverseCumulativeProbability(random.nextDouble()) + 1);
- }
- } catch (Exception e) {
- e.printStackTrace();
- fail("Caught " + e.toString());
+ PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
+ for (int i = 0; i < numWords; i++) {
+ // random integer
+ v.setQuick(i, dist.inverseCumulativeProbability(random.nextDouble()) + 1);
}
return v;
}
@@ -68,13 +62,13 @@
double topicSmoothing = 50.0 / numTopics; // whatever
Matrix m = new DenseMatrix(numTopics, numWords);
double[] logTotals = new double[numTopics];
- double ll = Double.NEGATIVE_INFINITY;
+ double ll = Double.NEGATIVE_INFINITY; // TODO this is not updated in loop?
for (int k = 0; k < numTopics; ++k) {
double total = 0.0; // total number of pseudo counts we made
for (int w = 0; w < numWords; ++w) {
// A small amount of random noise, minimized by having a floor.
- double pseudocount = random.nextDouble() + 1E-10;
+ double pseudocount = random.nextDouble() + 1.0E-10;
total += pseudocount;
m.setQuick(k, w, Math.log(pseudocount));
}
@@ -86,7 +80,7 @@
}
- private void runTest(int numWords, double sparsity, int numTests) {
+ private void runTest(int numWords, double sparsity, int numTests) throws MathException {
LDAState state = generateRandomState(numWords, NUM_TOPICS);
LDAInference lda = new LDAInference(state);
for (int t = 0; t < numTests; ++t) {
@@ -103,20 +97,20 @@
assertTrue(k + " " + w + " logProb " + logProb, logProb <= 0.0);
}
}
- assertTrue("log likelihood", doc.logLikelihood <= 1E-10);
+ assertTrue("log likelihood", doc.logLikelihood <= 1.0E-10);
}
}
- public void testLDAEasy() {
- runTest(10, 1, 5); // 1 word per doc in expectation
+ public void testLDAEasy() throws MathException {
+ runTest(10, 1.0, 5); // 1 word per doc in expectation
}
- public void testLDASparse() {
+ public void testLDASparse() throws MathException {
runTest(100, 0.4, 5); // 40 words per doc in expectation
}
- public void testLDADense() {
- runTest(100, 3, 5); // 300 words per doc in expectation
+ public void testLDADense() throws MathException {
+ runTest(100, 3.0, 5); // 300 words per doc in expectation
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java Mon Aug 24 20:16:37 2009
@@ -17,10 +17,7 @@
package org.apache.mahout.clustering.lda;
import java.io.File;
-import java.util.ArrayList;
import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
import java.util.Random;
import junit.framework.TestCase;
@@ -28,20 +25,20 @@
import org.apache.commons.math.distribution.PoissonDistribution;
import org.apache.commons.math.distribution.PoissonDistributionImpl;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
+import org.apache.commons.math.MathException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.mahout.matrix.DenseMatrix;
import org.apache.mahout.matrix.Matrix;
import org.apache.mahout.matrix.SparseVector;
import org.apache.mahout.matrix.Vector;
-import org.apache.mahout.utils.DummyOutputCollector;
import static org.easymock.classextension.EasyMock.*;
public class TestMapReduce extends TestCase {
+ private static final int NUM_TESTS = 10;
+ private static final int NUM_TOPICS = 10;
private Random random;
@@ -50,17 +47,12 @@
* @param numWords int number of words in the vocabulary
* @param numWords E[count] for each word
*/
- private SparseVector generateRandomDoc(int numWords, double sparsity) {
+ private SparseVector generateRandomDoc(int numWords, double sparsity) throws MathException {
SparseVector v = new SparseVector(numWords,(int)(numWords * sparsity));
- try {
- PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
- for (int i = 0; i < numWords; i++) {
- // random integer
- v.set(i,dist.inverseCumulativeProbability(random.nextDouble()) + 1);
- }
- } catch(Exception e) {
- e.printStackTrace();
- fail("Caught " + e.toString());
+ PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
+ for (int i = 0; i < numWords; i++) {
+ // random integer
+ v.set(i,dist.inverseCumulativeProbability(random.nextDouble()) + 1);
}
return v;
}
@@ -69,12 +61,12 @@
double topicSmoothing = 50.0 / numTopics; // whatever
Matrix m = new DenseMatrix(numTopics,numWords);
double[] logTotals = new double[numTopics];
- double ll = Double.NEGATIVE_INFINITY;
+ double ll = Double.NEGATIVE_INFINITY; // TODO this is not updated in loop?
for(int k = 0; k < numTopics; ++k) {
double total = 0.0; // total number of pseudo counts we made
for(int w = 0; w < numWords; ++w) {
// A small amount of random noise, minimized by having a floor.
- double pseudocount = random.nextDouble() + 1E-10;
+ double pseudocount = random.nextDouble() + 1.0E-10;
total += pseudocount;
m.setQuick(k,w,Math.log(pseudocount));
}
@@ -88,14 +80,11 @@
@Override
protected void setUp() throws Exception {
super.setUp();
+ random = new Random(0xCAFEBABECAFEBABEL);
File f = new File("input");
- random = new Random();
f.mkdir();
}
- private static int NUM_TESTS = 10;
- private static int NUM_TOPICS = 10;
-
/**
* Test the basic Mapper
*
@@ -120,7 +109,7 @@
}
}
- private int numNonZero(Vector v) {
+ private static int numNonZero(Vector v) {
int count = 0;
for(Iterator<Vector.Element> iter = v.iterateNonZero();
iter.hasNext();iter.next() ) {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java Mon Aug 24 20:16:37 2009
@@ -30,7 +30,7 @@
*/
public class DummyEvaluator implements FitnessEvaluator<DummyCandidate> {
- private final Random rng = new Random();
+ private final Random rng = new Random(0xCAFEBABECAFEBABEL);
private static final Map<Integer, Double> evaluations = new HashMap<Integer, Double>();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java Mon Aug 24 20:16:37 2009
@@ -250,7 +250,7 @@
}
/*public void testSparseVectorTimesX() {
- Random rnd = new Random(0xDEADBEEFL);
+ Random rnd = new Random(0xCAFEBABECAFEBABEL);
Vector v1 = randomSparseVector(rnd);
double x = rnd.nextDouble();
long t0 = System.currentTimeMillis();
@@ -274,7 +274,7 @@
}*/
/*public void testSparseVectorTimesV() {
- Random rnd = new Random(0xDEADBEEFL);
+ Random rnd = new Random(0xCAFEBABECAFEBABEL);
Vector v1 = randomSparseVector(rnd);
Vector v2 = randomSparseVector(rnd);
long t0 = System.currentTimeMillis();
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java Mon Aug 24 20:16:37 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.analysis;
import org.apache.lucene.analysis.Analyzer;
@@ -11,14 +28,9 @@
import java.io.Reader;
-
-/**
- *
- *
- **/
public class WikipediaAnalyzer extends Analyzer {
- private CharArraySet stopSet;
+ private final CharArraySet stopSet;
public WikipediaAnalyzer() {
stopSet = (CharArraySet) StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS);
@@ -34,8 +46,6 @@
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(true, result, stopSet);
-
-
return result;
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java Mon Aug 24 20:16:37 2009
@@ -29,12 +29,14 @@
import java.io.PrintWriter;
import java.io.FileNotFoundException;
import java.nio.charset.Charset;
+import java.util.regex.Pattern;
/**
* See <a href="http://www.informatik.uni-freiburg.de/~cziegler/BX/BX-CSV-Dump.zip">download</a> for
* data needed by this class. The BX-Book-Ratings.csv file is needed.
*/
public final class BookCrossingDataModel extends FileDataModel {
+ private static final Pattern NON_DIGIT_SEMICOLON_PATTERN = Pattern.compile("[^0-9;]");
public BookCrossingDataModel() throws IOException {
this(GroupLensDataModel.readResourceToTempFile(
@@ -60,7 +62,7 @@
writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
for (String line : new FileLineIterable(originalFile, true)) {
// Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.
- String convertedLine = line.replaceAll("[^0-9;]", "").replace(';', ',');
+ String convertedLine = NON_DIGIT_SEMICOLON_PATTERN.matcher(line).replaceAll("").replace(';', ',');
// If this means we deleted an entire ID -- few cases like that -- skip the line
if (convertedLine.contains(",,")) {
continue;
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java Mon Aug 24 20:16:37 2009
@@ -79,7 +79,8 @@
OutputStream os = new FileOutputStream(tempFile);
try {
int bytesRead;
- for (byte[] buffer = new byte[32768]; (bytesRead = is.read(buffer)) > 0;) {
+ byte[] buffer = new byte[32768];
+ while ((bytesRead = is.read(buffer)) > 0) {
os.write(buffer, 0, bytesRead);
}
os.flush();
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Mon Aug 24 20:16:37 2009
@@ -51,7 +51,7 @@
* Create and run the Wikipedia Dataset Creator.
*/
public class WikipediaDatasetCreatorDriver {
- private transient static Logger log = LoggerFactory.getLogger(WikipediaDatasetCreatorDriver.class);
+ private static final Logger log = LoggerFactory.getLogger(WikipediaDatasetCreatorDriver.class);
private WikipediaDatasetCreatorDriver() {
}
@@ -96,9 +96,8 @@
Parser parser = new Parser();
parser.setGroup(group);
- CommandLine cmdLine = null;
try {
- cmdLine = parser.parse(args);
+ CommandLine cmdLine = parser.parse(args);
if (cmdLine.hasOption(helpOpt)) {
CommandLineUtil.printHelp(group);
return;
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java Mon Aug 24 20:16:37 2009
@@ -30,32 +30,31 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
import org.apache.mahout.analysis.WikipediaAnalyzer;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import java.io.IOException;
import java.io.StringReader;
-import java.util.ArrayList;
import java.util.HashSet;
-import java.util.List;
import java.util.Set;
+import java.util.regex.Pattern;
public class WikipediaDatasetCreatorMapper extends MapReduceBase implements
Mapper<LongWritable, Text, Text, Text> {
- private transient static Logger log = LoggerFactory.getLogger(WikipediaDatasetCreatorMapper.class);
+ private static final Logger log = LoggerFactory.getLogger(WikipediaDatasetCreatorMapper.class);
private static Set<String> inputCategories = null;
private static boolean exactMatchOnly = false;
private static Analyzer analyzer;
+ private static final Pattern SPACE_NON_ALPHA_PATTERN = Pattern.compile("[\\s\\W]");
+
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
- StringBuilder contents = new StringBuilder();
+ StringBuilder contents = new StringBuilder();
String document = value.toString();
String catMatch = findMatchingCategory(document);
@@ -66,14 +65,14 @@
while((token = stream.next(token)) != null){
contents.append(token.termBuffer(), 0, token.termLength()).append(' ');
}
- output.collect(new Text(catMatch.replaceAll("[\\s\\W]","_")), new Text(contents.toString()));
+ output.collect(new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")), new Text(contents.toString()));
}
}
public static String findMatchingCategory(String document){
int startIndex = 0;
int categoryIndex;
- String match = null;
+ String match = null; // TODO this is never updated?
while((categoryIndex = document.indexOf("[[Category:", startIndex))!=-1)
{
categoryIndex+=11;
@@ -129,6 +128,7 @@
} catch (InstantiationException e) {
throw new RuntimeException(e);
}
- log.info("Configure: Input Categories size: " + inputCategories.size() + " Exact Match: " + exactMatchOnly + " Analyzer: " + analyzer.getClass().getName());
+ log.info("Configure: Input Categories size: " + inputCategories.size() + " Exact Match: " + exactMatchOnly +
+ " Analyzer: " + analyzer.getClass().getName());
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.canopy;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.canopy;
+
import java.awt.Graphics;
import java.awt.Graphics2D;
import java.util.ArrayList;
@@ -37,19 +37,18 @@
this.setTitle("Canopy Clusters (> 5% of population)");
}
- private static final long serialVersionUID = 1L;
-
private static List<Canopy> canopies;
private static final double t1 = 3.0;
private static final double t2 = 1.5;
+ @Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
Vector dv = new DenseVector(2);
- for (Canopy canopy : canopies)
+ for (Canopy canopy : canopies) {
if (canopy.getNumPoints() > sampleData.size() * 0.05) {
dv.assign(t1);
g2.setColor(colors[0]);
@@ -57,6 +56,7 @@
dv.assign(t2);
plotEllipse(g2, canopy.getCenter(), dv);
}
+ }
}
/**
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -35,8 +35,7 @@
+ (int) (significance * 100) + "% of population)");
}
- private static final long serialVersionUID = 1L;
-
+ @Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -28,15 +28,14 @@
import org.apache.mahout.matrix.Vector;
class DisplayASNDirichlet extends DisplayDirichlet {
- public DisplayASNDirichlet() {
+ DisplayASNDirichlet() {
initialize();
this
.setTitle("Dirichlet Process Clusters - Asymmetric Sampled Normal Distribution (>"
+ (int) (significance * 100) + "% of population)");
}
- private static final long serialVersionUID = 1L;
-
+ @Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -43,8 +43,7 @@
+ (int) (significance * 100) + "% of population)");
}
- private static final long serialVersionUID = 1L;
-
+ @Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
@@ -109,14 +108,10 @@
}
}
- public static void main(String[] args) {
+ public static void main(String[] args) throws IOException {
UncommonDistributions.init("Mahout=Hadoop+ML".getBytes());
- try {
- getSamples();
- getResults();
- } catch (IOException e) {
- e.printStackTrace();
- }
+ getSamples();
+ getResults();
new DisplayASNOutputState();
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.clustering.dirichlet;
import java.awt.Color;
@@ -12,6 +29,7 @@
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.List;
+import java.io.IOException;
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
@@ -20,7 +38,6 @@
import org.apache.mahout.matrix.Vector;
public class DisplayDirichlet extends Frame {
- private static final long serialVersionUID = 1L;
protected int res; //screen resolution
@@ -78,7 +95,7 @@
});
}
- public static void main(String[] args) {
+ public static void main(String[] args) throws IOException {
UncommonDistributions.init("Mahout=Hadoop+ML".getBytes());
generateSamples();
new DisplayDirichlet();
@@ -126,10 +143,10 @@
* @param dv a Vector of rectangle sizes
*/
public void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
- int h = size / 2;
double[] flip = { 1, -1 };
Vector v2 = v.clone().assign(new DenseVector(flip), new TimesFunction());
v2 = v2.minus(dv.divide(2));
+ int h = size / 2;
double x = v2.get(0) + h;
double y = v2.get(1) + h;
g2.draw(new Rectangle2D.Double(x * ds, y * ds, dv.get(0) * ds, dv.get(1)
@@ -143,10 +160,10 @@
* @param dv a Vector of rectangle sizes
*/
public void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
- int h = size / 2;
double[] flip = { 1, -1 };
Vector v2 = v.clone().assign(new DenseVector(flip), new TimesFunction());
v2 = v2.minus(dv.divide(2));
+ int h = size / 2;
double x = v2.get(0) + h;
double y = v2.get(1) + h;
g2
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -34,8 +34,7 @@
+ (int) (significance * 100) + "% of population)");
}
- private static final long serialVersionUID = 1L;
-
+ @Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -43,8 +43,7 @@
+ (int) (significance * 100) + "% of population)");
}
- private static final long serialVersionUID = 1L;
-
+ @Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
@@ -107,14 +106,10 @@
}
}
- public static void main(String[] args) {
+ public static void main(String[] args) throws IOException {
UncommonDistributions.init("Mahout=Hadoop+ML".getBytes());
- try {
- getSamples();
- getResults();
- } catch (IOException e) {
- e.printStackTrace();
- }
+ getSamples();
+ getResults();
new DisplayOutputState();
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.dirichlet;
+
import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -34,8 +34,7 @@
+ (int) (significance * 100) + "% of population)");
}
- private static final long serialVersionUID = 1L;
-
+ @Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.fuzzykmeans;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.fuzzykmeans;
+
import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -39,10 +39,6 @@
this.setTitle("Fuzzy K-Means Clusters (> 5% of population)");
}
- private static final long serialVersionUID = 1L;
-
- private static List<Canopy> canopies;
-
private static List<List<SoftCluster>> clusters;
private static final double t1 = 3.0;
@@ -58,11 +54,12 @@
for (List<SoftCluster> cls : clusters) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
g2.setColor(colors[Math.min(colors.length - 1, i--)]);
- for (SoftCluster cluster : cls)
- if (true || cluster.getWeightedPointTotal().zSum() > sampleData.size() * 0.05) {
+ for (SoftCluster cluster : cls) {
+ //if (true || cluster.getWeightedPointTotal().zSum() > sampleData.size() * 0.05) {
dv.assign(cluster.std() * 3);
plotEllipse(g2, cluster.getCenter(), dv);
- }
+ //}
+ }
}
}
@@ -171,7 +168,7 @@
generateSamples();
List<Vector> points = new ArrayList<Vector>();
points.addAll(sampleData);
- canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
+ List<Canopy> canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
DistanceMeasure measure = new ManhattanDistanceMeasure();
Cluster.config(measure, 0.001);
clusters = new ArrayList<List<SoftCluster>>();
@@ -179,11 +176,7 @@
for (Canopy canopy : canopies)
if (canopy.getNumPoints() > 0.05 * sampleData.size())
clusters.get(0).add(new SoftCluster(canopy.getCenter()));
- try {
- referenceFuzzyKMeans(sampleData, measure, 0.001, 10);
- } catch (Exception e) {
- e.printStackTrace();
- }
+ referenceFuzzyKMeans(sampleData, measure, 0.001, 10);
new DisplayFuzzyKMeans();
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.kmeans;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.kmeans;
+
import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -33,21 +33,18 @@
import org.apache.mahout.utils.ManhattanDistanceMeasure;
class DisplayKMeans extends DisplayDirichlet {
- public DisplayKMeans() {
+ DisplayKMeans() {
initialize();
this.setTitle("K-Means Clusters (> 5% of population)");
}
- private static final long serialVersionUID = 1L;
-
- private static List<Canopy> canopies;
-
private static List<List<Cluster>> clusters;
private static final double t1 = 3.0;
private static final double t2 = 1.5;
+ @Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
@@ -56,11 +53,12 @@
for (List<Cluster> cls : clusters) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
g2.setColor(colors[Math.min(colors.length - 1, i--)]);
- for (Cluster cluster : cls)
- if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
+ for (Cluster cluster : cls) {
+ //if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
dv.assign(cluster.getStd() * 3);
plotEllipse(g2, cluster.getCenter(), dv);
- }
+ //}
+ }
}
}
@@ -99,8 +97,7 @@
*/
private static boolean iterateReference(List<Vector> points,
List<Cluster> clusters, DistanceMeasure measure) {
- boolean converged;
- converged = true;
+ boolean converged = true;
// iterate through all points, assigning each to the nearest cluster
for (Vector point : points) {
Cluster closestCluster = null;
@@ -178,7 +175,7 @@
generateSamples();
List<Vector> points = new ArrayList<Vector>();
points.addAll(sampleData);
- canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
+ List<Canopy> canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
DistanceMeasure measure = new ManhattanDistanceMeasure();
Cluster.config(measure, 0.001);
clusters = new ArrayList<List<Cluster>>();
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.meanshift;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.meanshift;
+
import java.awt.Color;
import java.awt.Graphics;
import java.awt.Graphics2D;
@@ -37,8 +37,6 @@
this.setTitle("Canopy Clusters (> 1.5% of population)");
}
- private static final long serialVersionUID = 1L;
-
private static List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
private static final List<List<Vector>> iterationCenters = new ArrayList<List<Vector>>();
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java Mon Aug 24 20:16:37 2009
@@ -19,15 +19,15 @@
/**
* Constants shared between examples.
*/
-public final class Constants {
+public interface Constants {
/**
* Directory containing output for examples.
*/
- public static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clustered-points";
+ String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clustered-points";
/**
* Directory used to store the input after it has been processed from it's
* original form into one suitable for processing by the clustering examples.
*/
- public static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "/data";
+ String DIRECTORY_CONTAINING_CONVERTED_INPUT = "/data";
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java Mon Aug 24 20:16:37 2009
@@ -24,33 +24,35 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Pattern;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
public class InputMapper extends MapReduceBase implements
Mapper<LongWritable, Text, Text, Vector> {
+
+ private static final Pattern SPACE = java.util.regex.Pattern.compile(" ");
+
protected Class<? extends Vector> outputClass;
- protected Constructor constructor;
+ protected Constructor<?> constructor;
@Override
public void map(LongWritable key, Text values,
OutputCollector<Text, Vector> output, Reporter reporter) throws IOException {
- String[] numbers = values.toString().split(" ");
+ String[] numbers = SPACE.split(values.toString());
// sometimes there are multiple separator spaces
List<Double> doubles = new ArrayList<Double>();
for (String value : numbers) {
if (value.length() > 0)
doubles.add(Double.valueOf(value));
}
- Vector result = null;//new DenseVector(doubles.size());
try {
- result = (Vector) constructor.newInstance(doubles.size());
+ Vector result = (Vector) constructor.newInstance(doubles.size());//new DenseVector(doubles.size());
int index = 0;
for (Double d : doubles)
result.set(index++, d);
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.syntheticcontrol.dirichlet;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.clustering.syntheticcontrol.dirichlet;
+
import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java Mon Aug 24 20:16:37 2009
@@ -30,14 +30,17 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Pattern;
public class InputMapper extends MapReduceBase implements
Mapper<LongWritable, Text, Text, MeanShiftCanopy> {
+ private static final Pattern SPACE = Pattern.compile(" ");
+
@Override
public void map(LongWritable key, Text values,
OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter) throws IOException {
- String[] numbers = values.toString().split(" ");
+ String[] numbers = SPACE.split(values.toString());
// sometimes there are multiple separator spaces
List<Double> doubles = new ArrayList<Double>();
for (String value : numbers) {
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java Mon Aug 24 20:16:37 2009
@@ -124,13 +124,13 @@
public static class RndLineRecordReader implements
RecordReader<LongWritable, Text> {
- private RecordReader<LongWritable, Text> reader;
+ private final RecordReader<LongWritable, Text> reader;
- private Random rng;
+ private final Random rng;
- private double threshold;
+ private final double threshold;
- private boolean training;
+ private final boolean training;
private final LongWritable k = new LongWritable();
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java Mon Aug 24 20:16:37 2009
@@ -31,15 +31,15 @@
* offsprings will not any common gene.
*/
public void testMate1() {
- int maxattributes = 100;
- int maxcrosspnts = 10;
- int n = 100; // repeat this test n times
Random rng = new MersenneTwisterRNG();
// Initialize dataset
DataSet dataset = EasyMock.createMock(DataSet.class);
DataSet.initialize(dataset);
+ int n = 100; // repeat this test n times
+ int maxcrosspnts = 10;
+ int maxattributes = 100;
for (int nloop = 0; nloop < n; nloop++) {
// we need at least 2 attributes for the crossover
// and a label that will be skipped by the rules
@@ -54,8 +54,8 @@
CDCrossover crossover = new CDCrossover(crosspnts);
// the parents have no gene in common
- CDRule parent0 = generate0Rule(nbattributes);
- CDRule parent1 = generate1Rule(nbattributes);
+ CDRule parent0 = generate0Rule();
+ CDRule parent1 = generate1Rule();
List<CDRule> offsprings = crossover
.mate(parent0, parent1, crosspnts, rng);
@@ -78,15 +78,15 @@
* areas.
*/
public void testMate2() {
- int maxattributes = 100;
- int maxcrosspnts = 10;
- int n = 100; // repeat this test n times
Random rng = new MersenneTwisterRNG();
// Initialize dataset
DataSet dataset = EasyMock.createMock(DataSet.class);
DataSet.initialize(dataset);
+ int n = 100; // repeat this test n times
+ int maxcrosspnts = 10;
+ int maxattributes = 100;
for (int nloop = 0; nloop < n; nloop++) {
int nbattributes = rng.nextInt(maxattributes) + 3;
int crosspnts = rng.nextInt(maxcrosspnts) + 1;
@@ -102,8 +102,8 @@
CDCrossover crossover = new CDCrossover(crosspnts);
// the parents have no gene in common
- CDRule parent0 = generate0Rule(nbattributes);
- CDRule parent1 = generate1Rule(nbattributes);
+ CDRule parent0 = generate0Rule();
+ CDRule parent1 = generate1Rule();
// due to the random nature of the crossover their must be at most
// (crosspnts+1) areas in the offsprings.
@@ -127,7 +127,7 @@
}
- String printRule(CDRule rule) {
+ static String printRule(CDRule rule) {
StringBuffer buffer = new StringBuffer();
for (int index = 0; index < rule.getNbConditions(); index++) {
@@ -137,7 +137,7 @@
return buffer.toString();
}
- int countAreas(CDRule rule) {
+ static int countAreas(CDRule rule) {
int nbareas = 1; // we already start in an area
int partind = 0; // index of the start of the current part
@@ -153,7 +153,7 @@
return nbareas;
}
- CDRule generate0Rule(int nbattributes) {
+ static CDRule generate0Rule() {
CDRule rule = new CDRule(1);
for (int index = 0; index < rule.getNbConditions(); index++) {
@@ -165,7 +165,7 @@
return rule;
}
- CDRule generate1Rule(int nbattributes) {
+ static CDRule generate1Rule() {
CDRule rule = new CDRule(1);
for (int index = 0; index < rule.getNbConditions(); index++) {
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java Mon Aug 24 20:16:37 2009
@@ -30,18 +30,16 @@
public void testGet() {
int n = 100;
Random rng = new MersenneTwisterRNG();
- int tp, tn, fp, fn;
- double se, sp;
for (int nloop = 0; nloop < n; nloop++) {
- tp = rng.nextInt(1000);
- tn = rng.nextInt(1000);
- fp = rng.nextInt(1000);
- fn = rng.nextInt(1000);
+ int tp = rng.nextInt(1000);
+ int tn = rng.nextInt(1000);
+ int fp = rng.nextInt(1000);
+ int fn = rng.nextInt(1000);
CDFitness fitness = new CDFitness(tp, fp, tn, fn);
- se = ((double) tp) / (tp + fn);
- sp = ((double) tn) / (tn + fp);
+ double se = ((double) tp) / (tp + fn);
+ double sp = ((double) tn) / (tn + fp);
assertEquals(se * sp, fitness.get());
}