You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/08/24 22:16:40 UTC

svn commit: r807361 [1/2] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ core/src/main/java/org/apache/mahout/classifier/bayes/ core/src/main/java/org/apache/mahout/classifier/cbayes/ core/src/main/java/org/a...

Author: srowen
Date: Mon Aug 24 20:16:37 2009
New Revision: 807361

URL: http://svn.apache.org/viewvc?rev=807361&view=rev
Log:
MAHOUT-166

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java Mon Aug 24 20:16:37 2009
@@ -35,7 +35,6 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.Callable;
-import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * <p>A {@link Recommender} which caches the results from another {@link Recommender} in memory. Results are held by
@@ -46,7 +45,7 @@
   private static final Logger log = LoggerFactory.getLogger(CachingRecommender.class);
 
   private final Recommender recommender;
-  private final AtomicInteger maxHowMany;
+  private final int[] maxHowMany;
   private final Cache<Long, Recommendations> recommendationCache;
   private final Cache<LongPair, Float> estimatedPrefCache;
   private final RefreshHelper refreshHelper;
@@ -57,7 +56,7 @@
       throw new IllegalArgumentException("recommender is null");
     }
     this.recommender = recommender;
-    this.maxHowMany = new AtomicInteger(1);
+    this.maxHowMany = new int[] {1};
     // Use "num users" as an upper limit on cache size. Rough guess.
     int numUsers = recommender.getDataModel().getNumUsers();
     this.recommendationCache =
@@ -105,8 +104,8 @@
     }
 
     synchronized (maxHowMany) {
-      if (howMany > maxHowMany.get()) {
-        maxHowMany.set(howMany);
+      if (howMany > maxHowMany[0]) {
+        maxHowMany[0] = howMany;
       }
     }
 
@@ -186,7 +185,7 @@
     @Override
     public Recommendations get(Long key) throws TasteException {
       log.debug("Retrieving new recommendations for user ID '{}'", key);
-      int howMany = maxHowMany.get();
+      int howMany = maxHowMany[0];
       Rescorer<Long> rescorer = getCurrentRescorer();
       List<RecommendedItem> recommendations = rescorer == null ?
           recommender.recommend(key, howMany) :

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesThetaNormalizerMapper.java Mon Aug 24 20:16:37 2009
@@ -56,7 +56,8 @@
     String labelFeaturePair = key.toString();
     double alpha_i = 1.0;
 
-    String label = labelFeaturePair.split(",")[0];
+    int comma = labelFeaturePair.indexOf(',');
+    String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
     double weight = Math.log((value.get() + alpha_i) / (labelWeightSum.get(label) + vocabCount));
     output.collect(new Text(('_' + label).trim()), new DoubleWritable(weight));
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesNormalizedWeightMapper.java Mon Aug 24 20:16:37 2009
@@ -52,7 +52,8 @@
 
     String labelFeaturePair = key.toString();
 
-    String label = labelFeaturePair.split(",")[0];
+    int comma = labelFeaturePair.indexOf(',');
+    String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
     output.collect(key, new DoubleWritable(-Math.log(value.get()) / thetaNormalizer.get(label)));// output -D_ij
 
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java Mon Aug 24 20:16:37 2009
@@ -63,7 +63,8 @@
         output.collect(new Text((stringDoubleEntry.getKey() + ',' + feature).trim()), weight); //output Sigma_j
       }
     } else {
-      String label = labelFeaturePair.split(",")[0];
+      int comma = labelFeaturePair.indexOf(',');
+      String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
       double inverseDenominator = 1.0 / (sigma_jSigma_k - labelWeightSum.get(label) + vocabCount);
       DoubleWritable weight = new DoubleWritable(-value.get() * inverseDenominator);
       output.collect(key, weight);//output -D_ij       

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java Mon Aug 24 20:16:37 2009
@@ -63,7 +63,8 @@
       }
 
     } else {
-      String label = labelFeaturePair.split(",")[0];
+      int comma = labelFeaturePair.indexOf(',');
+      String label = comma < 0 ? labelFeaturePair : labelFeaturePair.substring(0, comma);
 
       double D_ij = value.get();
       double denominator = 0.5 * ((sigma_jSigma_k / vocabCount) + (D_ij * this.labelWeightSum.size()));

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java Mon Aug 24 20:16:37 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.clustering;
 
 import org.apache.hadoop.io.Writable;
@@ -8,10 +25,6 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
-/**
- *
- *
- **/
 public abstract class ClusterBase implements Writable {
   // this cluster's clusterId
   protected int id;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
 import org.apache.mahout.matrix.DenseVector;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
 import org.apache.mahout.matrix.DenseVector;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.Vector;
 import org.uncommons.maths.random.GaussianGenerator;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet.models;
+
 import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
 import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.Vector;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java Mon Aug 24 20:16:37 2009
@@ -1,7 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
-import org.apache.hadoop.io.Writable;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -19,6 +15,10 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet.models;
+
+import org.apache.hadoop.io.Writable;
+
 /**
  * A model is a probability distribution over observed data points and allows the probability of any data point to be
  * computed.

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet.models;
+
 /** A model distribution allows us to sample a model from its prior distribution. */
 public interface ModelDistribution<O> {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet.models;
+
 import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.Vector;
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet.models;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet.models;
+
 import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
 import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.Vector;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.clustering.fuzzykmeans;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,7 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.fuzzykmeans;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.clustering.kmeans;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.kmeans;
+
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.clustering.kmeans;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.kmeans;
+
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Mon Aug 24 20:16:37 2009
@@ -68,7 +68,7 @@
 
     @Override
     public boolean equals(Object o) {
-      if (!(o instanceof String)) {
+      if (!(o instanceof StringDoublePair)) {
         return false;
       }
       StringDoublePair other = (StringDoublePair) o;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java Mon Aug 24 20:16:37 2009
@@ -292,9 +292,12 @@
 
   @Override
   public int hashCode() {
-    int result = (values != null ? values.hashCode() : 0);
-    result = 31 * result + name.hashCode();
-
+    int result = name.hashCode();
+    if (values != null) {
+      for (double value : values) {
+        result = 31 * result + (int) Double.doubleToLongBits(value);
+      }
+    }
     return result;
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.utils;
+
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.matrix.CardinalityException;
 import org.apache.mahout.matrix.Vector;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/StringUtils.java Mon Aug 24 20:16:37 2009
@@ -19,6 +19,8 @@
 
 import com.thoughtworks.xstream.XStream;
 
+import java.util.regex.Pattern;
+
 /**
  * Offers two methods to convert an object to a string representation and restore the object given its string
  * representation. Should use Hadoop Stringifier whenever available.
@@ -26,6 +28,7 @@
 public final class StringUtils {
 
   private static final XStream xstream = new XStream();
+  private static final Pattern NEWLINE_PATTERN = Pattern.compile("\n");
 
   private StringUtils() {
     // do nothing
@@ -38,7 +41,7 @@
    * @return the string representation of the object
    */
   public static String toString(Object obj) {
-    return xstream.toXML(obj).replaceAll("\n", "");
+    return NEWLINE_PATTERN.matcher(xstream.toXML(obj)).replaceAll("");
   }
 
   /**

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDirichletClustering.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import junit.framework.TestCase;
 import org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution;
 import org.apache.mahout.clustering.dirichlet.models.Model;

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestDistributions.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import junit.framework.TestCase;
 import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.Vector;

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.lda;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,15 +15,16 @@
  * limitations under the License.
  */
 
-import java.util.ArrayList;
+package org.apache.mahout.clustering.lda;
+
 import java.util.Iterator;
-import java.util.List;
 import java.util.Random;
 
 import junit.framework.TestCase;
 
 import org.apache.commons.math.distribution.PoissonDistribution;
 import org.apache.commons.math.distribution.PoissonDistributionImpl;
+import org.apache.commons.math.MathException;
 
 import org.apache.mahout.matrix.DenseMatrix;
 import org.apache.mahout.matrix.DenseVector;
@@ -34,14 +33,14 @@
 
 public class TestLDAInference extends TestCase {
 
-  private Random random;
+  private static final int NUM_TOPICS = 20;
 
-  private static int NUM_TOPICS = 20;
+  private Random random;
 
   @Override
   protected void setUp() throws Exception {
     super.setUp();
-    random = new Random();
+    random = new Random(0xCAFEBABECAFEBABEL);
   }
 
   /**
@@ -49,17 +48,12 @@
    * @param numWords int number of words in the vocabulary
    * @param numWords E[count] for each word
    */
-  private Vector generateRandomDoc(int numWords, double sparsity) {
+  private Vector generateRandomDoc(int numWords, double sparsity) throws MathException {
     Vector v = new DenseVector(numWords);
-    try {
-      PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
-      for (int i = 0; i < numWords; i++) {
-        // random integer
-        v.setQuick(i, dist.inverseCumulativeProbability(random.nextDouble()) + 1);
-      }
-    } catch (Exception e) {
-      e.printStackTrace();
-      fail("Caught " + e.toString());
+    PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
+    for (int i = 0; i < numWords; i++) {
+      // random integer
+      v.setQuick(i, dist.inverseCumulativeProbability(random.nextDouble()) + 1);
     }
     return v;
   }
@@ -68,13 +62,13 @@
     double topicSmoothing = 50.0 / numTopics; // whatever
     Matrix m = new DenseMatrix(numTopics, numWords);
     double[] logTotals = new double[numTopics];
-    double ll = Double.NEGATIVE_INFINITY;
+    double ll = Double.NEGATIVE_INFINITY; // TODO this is not updated in loop?
 
     for (int k = 0; k < numTopics; ++k) {
       double total = 0.0; // total number of pseudo counts we made
       for (int w = 0; w < numWords; ++w) {
         // A small amount of random noise, minimized by having a floor.
-        double pseudocount = random.nextDouble() + 1E-10;
+        double pseudocount = random.nextDouble() + 1.0E-10;
         total += pseudocount;
         m.setQuick(k, w, Math.log(pseudocount));
       }
@@ -86,7 +80,7 @@
   }
 
 
-  private void runTest(int numWords, double sparsity, int numTests) {
+  private void runTest(int numWords, double sparsity, int numTests) throws MathException {
     LDAState state = generateRandomState(numWords, NUM_TOPICS);
     LDAInference lda = new LDAInference(state);
     for (int t = 0; t < numTests; ++t) {
@@ -103,20 +97,20 @@
           assertTrue(k + " " + w + " logProb " + logProb, logProb <= 0.0); 
         }
       }
-      assertTrue("log likelihood", doc.logLikelihood <= 1E-10);
+      assertTrue("log likelihood", doc.logLikelihood <= 1.0E-10);
     }
   }
 
 
-  public void testLDAEasy() {
-    runTest(10, 1, 5); // 1 word per doc in expectation
+  public void testLDAEasy() throws MathException {
+    runTest(10, 1.0, 5); // 1 word per doc in expectation
   }
 
-  public void testLDASparse() {
+  public void testLDASparse() throws MathException {
     runTest(100, 0.4, 5); // 40 words per doc in expectation
   }
 
-  public void testLDADense() {
-    runTest(100, 3, 5); // 300 words per doc in expectation
+  public void testLDADense() throws MathException {
+    runTest(100, 3.0, 5); // 300 words per doc in expectation
   }
 }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java Mon Aug 24 20:16:37 2009
@@ -17,10 +17,7 @@
 package org.apache.mahout.clustering.lda;
 
 import java.io.File;
-import java.util.ArrayList;
 import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
 import java.util.Random;
 
 import junit.framework.TestCase;
@@ -28,20 +25,20 @@
 
 import org.apache.commons.math.distribution.PoissonDistribution;
 import org.apache.commons.math.distribution.PoissonDistributionImpl;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
+import org.apache.commons.math.MathException;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.mahout.matrix.DenseMatrix;
 import org.apache.mahout.matrix.Matrix;
 import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
-import org.apache.mahout.utils.DummyOutputCollector;
 
 import static org.easymock.classextension.EasyMock.*;
 
 public class TestMapReduce extends TestCase {
 
+  private static final int NUM_TESTS = 10;
+  private static final int NUM_TOPICS = 10;
 
   private Random random;
 
@@ -50,17 +47,12 @@
    * @param numWords int number of words in the vocabulary
    * @param numWords E[count] for each word
    */
-  private SparseVector generateRandomDoc(int numWords, double sparsity) {
+  private SparseVector generateRandomDoc(int numWords, double sparsity) throws MathException {
     SparseVector v = new SparseVector(numWords,(int)(numWords * sparsity));
-    try {
-      PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
-      for (int i = 0; i < numWords; i++) {
-        // random integer
-        v.set(i,dist.inverseCumulativeProbability(random.nextDouble()) + 1);
-      }
-    } catch(Exception e) {
-      e.printStackTrace();
-      fail("Caught " + e.toString());
+    PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
+    for (int i = 0; i < numWords; i++) {
+      // random integer
+      v.set(i,dist.inverseCumulativeProbability(random.nextDouble()) + 1);
     }
     return v;
   }
@@ -69,12 +61,12 @@
     double topicSmoothing = 50.0 / numTopics; // whatever
     Matrix m = new DenseMatrix(numTopics,numWords);
     double[] logTotals = new double[numTopics];
-    double ll = Double.NEGATIVE_INFINITY;
+    double ll = Double.NEGATIVE_INFINITY; // TODO this is not updated in loop?
     for(int k = 0; k < numTopics; ++k) {
       double total = 0.0; // total number of pseudo counts we made
       for(int w = 0; w < numWords; ++w) {
         // A small amount of random noise, minimized by having a floor.
-        double pseudocount = random.nextDouble() + 1E-10;
+        double pseudocount = random.nextDouble() + 1.0E-10;
         total += pseudocount;
         m.setQuick(k,w,Math.log(pseudocount));
       }
@@ -88,14 +80,11 @@
   @Override
   protected void setUp() throws Exception {
     super.setUp();
+    random = new Random(0xCAFEBABECAFEBABEL);
     File f = new File("input");
-    random = new Random();
     f.mkdir();
   }
 
-  private static int NUM_TESTS = 10;
-  private static int NUM_TOPICS = 10;
-
   /**
    * Test the basic Mapper
    * 
@@ -120,7 +109,7 @@
     }
   }
 
-  private int numNonZero(Vector v) {
+  private static int numNonZero(Vector v) {
     int count = 0;
     for(Iterator<Vector.Element> iter = v.iterateNonZero();
         iter.hasNext();iter.next() ) {

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java Mon Aug 24 20:16:37 2009
@@ -30,7 +30,7 @@
  */
 public class DummyEvaluator implements FitnessEvaluator<DummyCandidate> {
 
-  private final Random rng = new Random();
+  private final Random rng = new Random(0xCAFEBABECAFEBABEL);
 
   private static final Map<Integer, Double> evaluations = new HashMap<Integer, Double>();
 

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java Mon Aug 24 20:16:37 2009
@@ -250,7 +250,7 @@
   }
 
   /*public void testSparseVectorTimesX() {
-    Random rnd = new Random(0xDEADBEEFL);
+    Random rnd = new Random(0xCAFEBABECAFEBABEL);
     Vector v1 = randomSparseVector(rnd);
     double x = rnd.nextDouble();
     long t0 = System.currentTimeMillis();
@@ -274,7 +274,7 @@
   }*/
 
   /*public void testSparseVectorTimesV() {
-    Random rnd = new Random(0xDEADBEEFL);
+    Random rnd = new Random(0xCAFEBABECAFEBABEL);
     Vector v1 = randomSparseVector(rnd);
     Vector v2 = randomSparseVector(rnd);
     long t0 = System.currentTimeMillis();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java Mon Aug 24 20:16:37 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.analysis;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -11,14 +28,9 @@
 
 import java.io.Reader;
 
-
-/**
- *
- *
- **/
 public class WikipediaAnalyzer extends Analyzer {
 
-  private CharArraySet stopSet;
+  private final CharArraySet stopSet;
 
   public WikipediaAnalyzer() {
     stopSet = (CharArraySet) StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS);
@@ -34,8 +46,6 @@
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(true, result, stopSet);
-
-
     return result;
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java Mon Aug 24 20:16:37 2009
@@ -29,12 +29,14 @@
 import java.io.PrintWriter;
 import java.io.FileNotFoundException;
 import java.nio.charset.Charset;
+import java.util.regex.Pattern;
 
 /**
  * See <a href="http://www.informatik.uni-freiburg.de/~cziegler/BX/BX-CSV-Dump.zip">download</a> for
  * data needed by this class. The BX-Book-Ratings.csv file is needed.
  */
 public final class BookCrossingDataModel extends FileDataModel {
+  private static final Pattern NON_DIGIT_SEMICOLON_PATTERN = Pattern.compile("[^0-9;]");
 
   public BookCrossingDataModel() throws IOException {
     this(GroupLensDataModel.readResourceToTempFile(
@@ -60,7 +62,7 @@
       writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
       for (String line : new FileLineIterable(originalFile, true)) {
         // Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.
-        String convertedLine = line.replaceAll("[^0-9;]", "").replace(';', ',');
+        String convertedLine = NON_DIGIT_SEMICOLON_PATTERN.matcher(line).replaceAll("").replace(';', ',');
         // If this means we deleted an entire ID -- few cases like that -- skip the line
         if (convertedLine.contains(",,")) {
           continue;

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java Mon Aug 24 20:16:37 2009
@@ -79,7 +79,8 @@
       OutputStream os = new FileOutputStream(tempFile);
       try {
         int bytesRead;
-        for (byte[] buffer = new byte[32768]; (bytesRead = is.read(buffer)) > 0;) {
+        byte[] buffer = new byte[32768];
+        while ((bytesRead = is.read(buffer)) > 0) {
           os.write(buffer, 0, bytesRead);
         }
         os.flush();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Mon Aug 24 20:16:37 2009
@@ -51,7 +51,7 @@
  * Create and run the Wikipedia Dataset Creator.
  */
 public class WikipediaDatasetCreatorDriver {
-  private transient static Logger log = LoggerFactory.getLogger(WikipediaDatasetCreatorDriver.class);
+  private static final Logger log = LoggerFactory.getLogger(WikipediaDatasetCreatorDriver.class);
 
   private WikipediaDatasetCreatorDriver() {
   }
@@ -96,9 +96,8 @@
 
     Parser parser = new Parser();
     parser.setGroup(group);
-    CommandLine cmdLine = null;
     try {
-      cmdLine = parser.parse(args);
+      CommandLine cmdLine = parser.parse(args);
       if (cmdLine.hasOption(helpOpt)) {
         CommandLineUtil.printHelp(group);
         return;

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java Mon Aug 24 20:16:37 2009
@@ -30,32 +30,31 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
 import org.apache.mahout.analysis.WikipediaAnalyzer;
 import org.slf4j.LoggerFactory;
 import org.slf4j.Logger;
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.ArrayList;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
+import java.util.regex.Pattern;
 
 public class WikipediaDatasetCreatorMapper extends MapReduceBase implements
     Mapper<LongWritable, Text, Text, Text> {
-  private transient static Logger log = LoggerFactory.getLogger(WikipediaDatasetCreatorMapper.class);
+  private static final Logger log = LoggerFactory.getLogger(WikipediaDatasetCreatorMapper.class);
 
   private static Set<String> inputCategories = null;
   private static boolean exactMatchOnly = false;
   private static Analyzer analyzer;
+  private static final Pattern SPACE_NON_ALPHA_PATTERN = Pattern.compile("[\\s\\W]");
+
   @Override
   public void map(LongWritable key, Text value,
       OutputCollector<Text, Text> output, Reporter reporter)
       throws IOException {
 
-            StringBuilder contents = new StringBuilder();
+    StringBuilder contents = new StringBuilder();
     String document = value.toString();
     String catMatch = findMatchingCategory(document);
     
@@ -66,14 +65,14 @@
       while((token = stream.next(token)) != null){
         contents.append(token.termBuffer(), 0, token.termLength()).append(' ');
       }
-      output.collect(new Text(catMatch.replaceAll("[\\s\\W]","_")), new Text(contents.toString()));
+      output.collect(new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")), new Text(contents.toString()));
     }
   }
 
   public static String findMatchingCategory(String document){
     int startIndex = 0;
     int categoryIndex;
-    String match = null;
+    String match = null; // TODO this is never updated?
     while((categoryIndex = document.indexOf("[[Category:", startIndex))!=-1)
     {
       categoryIndex+=11;
@@ -129,6 +128,7 @@
     } catch (InstantiationException e) {
       throw new RuntimeException(e);
     }
-    log.info("Configure: Input Categories size: " + inputCategories.size() + " Exact Match: " + exactMatchOnly + " Analyzer: " + analyzer.getClass().getName());
+    log.info("Configure: Input Categories size: " + inputCategories.size() + " Exact Match: " + exactMatchOnly +
+             " Analyzer: " + analyzer.getClass().getName());
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.canopy;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.canopy;
+
 import java.awt.Graphics;
 import java.awt.Graphics2D;
 import java.util.ArrayList;
@@ -37,19 +37,18 @@
     this.setTitle("Canopy Clusters (> 5% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
   private static List<Canopy> canopies;
 
   private static final double t1 = 3.0;
 
   private static final double t2 = 1.5;
 
+  @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;
     Vector dv = new DenseVector(2);
-    for (Canopy canopy : canopies)
+    for (Canopy canopy : canopies) {
       if (canopy.getNumPoints() > sampleData.size() * 0.05) {
         dv.assign(t1);
         g2.setColor(colors[0]);
@@ -57,6 +56,7 @@
         dv.assign(t2);
         plotEllipse(g2, canopy.getCenter(), dv);
       }
+    }
   }
 
   /**

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -35,8 +35,7 @@
             + (int) (significance * 100) + "% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
+  @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -28,15 +28,14 @@
 import org.apache.mahout.matrix.Vector;
 
 class DisplayASNDirichlet extends DisplayDirichlet {
-  public DisplayASNDirichlet() {
+  DisplayASNDirichlet() {
     initialize();
     this
         .setTitle("Dirichlet Process Clusters - Asymmetric Sampled Normal Distribution (>"
             + (int) (significance * 100) + "% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
+  @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -43,8 +43,7 @@
         + (int) (significance * 100) + "% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
+  @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;
@@ -109,14 +108,10 @@
     }
   }
 
-  public static void main(String[] args) {
+  public static void main(String[] args) throws IOException {
     UncommonDistributions.init("Mahout=Hadoop+ML".getBytes());
-    try {
-      getSamples();
-      getResults();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
+    getSamples();
+    getResults();
     new DisplayASNOutputState();
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.clustering.dirichlet;
 
 import java.awt.Color;
@@ -12,6 +29,7 @@
 import java.awt.geom.Rectangle2D;
 import java.util.ArrayList;
 import java.util.List;
+import java.io.IOException;
 
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
@@ -20,7 +38,6 @@
 import org.apache.mahout.matrix.Vector;
 
 public class DisplayDirichlet extends Frame {
-  private static final long serialVersionUID = 1L;
 
   protected int res; //screen resolution
 
@@ -78,7 +95,7 @@
     });
   }
 
-  public static void main(String[] args) {
+  public static void main(String[] args) throws IOException {
     UncommonDistributions.init("Mahout=Hadoop+ML".getBytes());
     generateSamples();
     new DisplayDirichlet();
@@ -126,10 +143,10 @@
    * @param dv a Vector of rectangle sizes
    */
   public void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
-    int h = size / 2;
     double[] flip = { 1, -1 };
     Vector v2 = v.clone().assign(new DenseVector(flip), new TimesFunction());
     v2 = v2.minus(dv.divide(2));
+    int h = size / 2;
     double x = v2.get(0) + h;
     double y = v2.get(1) + h;
     g2.draw(new Rectangle2D.Double(x * ds, y * ds, dv.get(0) * ds, dv.get(1)
@@ -143,10 +160,10 @@
    * @param dv a Vector of rectangle sizes
    */
   public void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
-    int h = size / 2;
     double[] flip = { 1, -1 };
     Vector v2 = v.clone().assign(new DenseVector(flip), new TimesFunction());
     v2 = v2.minus(dv.divide(2));
+    int h = size / 2;
     double x = v2.get(0) + h;
     double y = v2.get(1) + h;
     g2

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -34,8 +34,7 @@
         + (int) (significance * 100) + "% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
+  @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -43,8 +43,7 @@
         + (int) (significance * 100) + "% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
+  @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;
@@ -107,14 +106,10 @@
     }
   }
 
-  public static void main(String[] args) {
+  public static void main(String[] args) throws IOException {
     UncommonDistributions.init("Mahout=Hadoop+ML".getBytes());
-    try {
-      getSamples();
-      getResults();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
+    getSamples();
+    getResults();
     new DisplayOutputState();
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.dirichlet;
+
 import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -34,8 +34,7 @@
         + (int) (significance * 100) + "% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
+  @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.fuzzykmeans;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.fuzzykmeans;
+
 import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -39,10 +39,6 @@
     this.setTitle("Fuzzy K-Means Clusters (> 5% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
-  private static List<Canopy> canopies;
-
   private static List<List<SoftCluster>> clusters;
 
   private static final double t1 = 3.0;
@@ -58,11 +54,12 @@
     for (List<SoftCluster> cls : clusters) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
       g2.setColor(colors[Math.min(colors.length - 1, i--)]);
-      for (SoftCluster cluster : cls)
-        if (true || cluster.getWeightedPointTotal().zSum() > sampleData.size() * 0.05) {
+      for (SoftCluster cluster : cls) {
+        //if (true || cluster.getWeightedPointTotal().zSum() > sampleData.size() * 0.05) {
           dv.assign(cluster.std() * 3);
           plotEllipse(g2, cluster.getCenter(), dv);
-        }
+        //}
+      }
     }
   }
 
@@ -171,7 +168,7 @@
     generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     points.addAll(sampleData);
-    canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
+    List<Canopy> canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
     DistanceMeasure measure = new ManhattanDistanceMeasure();
     Cluster.config(measure, 0.001);
     clusters = new ArrayList<List<SoftCluster>>();
@@ -179,11 +176,7 @@
     for (Canopy canopy : canopies)
       if (canopy.getNumPoints() > 0.05 * sampleData.size())
         clusters.get(0).add(new SoftCluster(canopy.getCenter()));
-    try {
-      referenceFuzzyKMeans(sampleData, measure, 0.001, 10);
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
+    referenceFuzzyKMeans(sampleData, measure, 0.001, 10);
     new DisplayFuzzyKMeans();
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.kmeans;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.kmeans;
+
 import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -33,21 +33,18 @@
 import org.apache.mahout.utils.ManhattanDistanceMeasure;
 
 class DisplayKMeans extends DisplayDirichlet {
-  public DisplayKMeans() {
+  DisplayKMeans() {
     initialize();
     this.setTitle("K-Means Clusters (> 5% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
-  private static List<Canopy> canopies;
-
   private static List<List<Cluster>> clusters;
 
   private static final double t1 = 3.0;
 
   private static final double t2 = 1.5;
 
+  @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;
@@ -56,11 +53,12 @@
     for (List<Cluster> cls : clusters) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
       g2.setColor(colors[Math.min(colors.length - 1, i--)]);
-      for (Cluster cluster : cls)
-        if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
+      for (Cluster cluster : cls) {
+        //if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
           dv.assign(cluster.getStd() * 3);
           plotEllipse(g2, cluster.getCenter(), dv);
-        }
+        //}
+      }
     }
   }
 
@@ -99,8 +97,7 @@
    */
   private static boolean iterateReference(List<Vector> points,
       List<Cluster> clusters, DistanceMeasure measure) {
-    boolean converged;
-    converged = true;
+    boolean converged = true;
     // iterate through all points, assigning each to the nearest cluster
     for (Vector point : points) {
       Cluster closestCluster = null;
@@ -178,7 +175,7 @@
     generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     points.addAll(sampleData);
-    canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
+    List<Canopy> canopies = populateCanopies(new ManhattanDistanceMeasure(), points, t1, t2);
     DistanceMeasure measure = new ManhattanDistanceMeasure();
     Cluster.config(measure, 0.001);
     clusters = new ArrayList<List<Cluster>>();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.meanshift;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.meanshift;
+
 import java.awt.Color;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
@@ -37,8 +37,6 @@
     this.setTitle("Canopy Clusters (> 1.5% of population)");
   }
 
-  private static final long serialVersionUID = 1L;
-
   private static List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
 
   private static final List<List<Vector>> iterationCenters = new ArrayList<List<Vector>>();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/Constants.java Mon Aug 24 20:16:37 2009
@@ -19,15 +19,15 @@
 /**
  * Constants shared between examples.
  */
-public final class Constants {
+public interface Constants {
 
     /**
      * Directory containing output for examples.
      */
-    public static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clustered-points";
+    String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clustered-points";
     /**
      * Directory used to store the input after it has been processed from it's 
      * original form into one suitable for processing by the clustering examples.
      */
-    public static final String DIRECTORY_CONTAINING_CONVERTED_INPUT = "/data";
+    String DIRECTORY_CONTAINING_CONVERTED_INPUT = "/data";
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java Mon Aug 24 20:16:37 2009
@@ -24,33 +24,35 @@
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.matrix.DenseVector;
 import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Pattern;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 
 public class InputMapper extends MapReduceBase implements
     Mapper<LongWritable, Text, Text, Vector> {
+
+  private static final Pattern SPACE = java.util.regex.Pattern.compile(" ");
+
   protected Class<? extends Vector> outputClass;
-  protected Constructor constructor;
+  protected Constructor<?> constructor;
 
   @Override
   public void map(LongWritable key, Text values,
       OutputCollector<Text, Vector> output, Reporter reporter) throws IOException {
-    String[] numbers = values.toString().split(" ");
+    String[] numbers = SPACE.split(values.toString());
     // sometimes there are multiple separator spaces
     List<Double> doubles = new ArrayList<Double>();
     for (String value : numbers) {
       if (value.length() > 0)
         doubles.add(Double.valueOf(value));
     }
-    Vector result = null;//new DenseVector(doubles.size());
     try {
-      result = (Vector) constructor.newInstance(doubles.size());
+      Vector result = (Vector) constructor.newInstance(doubles.size());//new DenseVector(doubles.size());
       int index = 0;
       for (Double d : doubles)
         result.set(index++, d);

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/NormalScModelDistribution.java Mon Aug 24 20:16:37 2009
@@ -1,5 +1,3 @@
-package org.apache.mahout.clustering.syntheticcontrol.dirichlet;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering.syntheticcontrol.dirichlet;
+
 import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java Mon Aug 24 20:16:37 2009
@@ -30,14 +30,17 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Pattern;
 
 public class InputMapper extends MapReduceBase implements
     Mapper<LongWritable, Text, Text, MeanShiftCanopy> {
 
+  private static final Pattern SPACE = Pattern.compile(" ");
+
   @Override
   public void map(LongWritable key, Text values,
       OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter) throws IOException {
-    String[] numbers = values.toString().split(" ");
+    String[] numbers = SPACE.split(values.toString());
     // sometimes there are multiple separator spaces
     List<Double> doubles = new ArrayList<Double>();
     for (String value : numbers) {

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java Mon Aug 24 20:16:37 2009
@@ -124,13 +124,13 @@
   public static class RndLineRecordReader implements
       RecordReader<LongWritable, Text> {
 
-    private RecordReader<LongWritable, Text> reader;
+    private final RecordReader<LongWritable, Text> reader;
 
-    private Random rng;
+    private final Random rng;
 
-    private double threshold;
+    private final double threshold;
 
-    private boolean training;
+    private final boolean training;
 
     private final LongWritable k = new LongWritable();
 

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDCrossoverTest.java Mon Aug 24 20:16:37 2009
@@ -31,15 +31,15 @@
    * offsprings will not any common gene.
    */
   public void testMate1() {
-    int maxattributes = 100;
-    int maxcrosspnts = 10;
-    int n = 100; // repeat this test n times
     Random rng = new MersenneTwisterRNG();
 
     // Initialize dataset
     DataSet dataset = EasyMock.createMock(DataSet.class);
     DataSet.initialize(dataset);
 
+    int n = 100; // repeat this test n times
+    int maxcrosspnts = 10;
+    int maxattributes = 100;
     for (int nloop = 0; nloop < n; nloop++) {
       // we need at least 2 attributes for the crossover
       // and a label that will be skipped by the rules
@@ -54,8 +54,8 @@
       CDCrossover crossover = new CDCrossover(crosspnts);
 
       // the parents have no gene in common
-      CDRule parent0 = generate0Rule(nbattributes);
-      CDRule parent1 = generate1Rule(nbattributes);
+      CDRule parent0 = generate0Rule();
+      CDRule parent1 = generate1Rule();
 
       List<CDRule> offsprings = crossover
           .mate(parent0, parent1, crosspnts, rng);
@@ -78,15 +78,15 @@
    * areas.
    */
   public void testMate2() {
-    int maxattributes = 100;
-    int maxcrosspnts = 10;
-    int n = 100; // repeat this test n times
     Random rng = new MersenneTwisterRNG();
 
     // Initialize dataset
     DataSet dataset = EasyMock.createMock(DataSet.class);
     DataSet.initialize(dataset);
 
+    int n = 100; // repeat this test n times
+    int maxcrosspnts = 10;
+    int maxattributes = 100;
     for (int nloop = 0; nloop < n; nloop++) {
       int nbattributes = rng.nextInt(maxattributes) + 3;
       int crosspnts = rng.nextInt(maxcrosspnts) + 1;
@@ -102,8 +102,8 @@
       CDCrossover crossover = new CDCrossover(crosspnts);
 
       // the parents have no gene in common
-      CDRule parent0 = generate0Rule(nbattributes);
-      CDRule parent1 = generate1Rule(nbattributes);
+      CDRule parent0 = generate0Rule();
+      CDRule parent1 = generate1Rule();
 
       // due to the random nature of the crossover their must be at most
       // (crosspnts+1) areas in the offsprings.
@@ -127,7 +127,7 @@
 
   }
 
-  String printRule(CDRule rule) {
+  static String printRule(CDRule rule) {
     StringBuffer buffer = new StringBuffer();
 
     for (int index = 0; index < rule.getNbConditions(); index++) {
@@ -137,7 +137,7 @@
     return buffer.toString();
   }
 
-  int countAreas(CDRule rule) {
+  static int countAreas(CDRule rule) {
 
     int nbareas = 1; // we already start in an area
     int partind = 0; // index of the start of the current part
@@ -153,7 +153,7 @@
     return nbareas;
   }
 
-  CDRule generate0Rule(int nbattributes) {
+  static CDRule generate0Rule() {
     CDRule rule = new CDRule(1);
 
     for (int index = 0; index < rule.getNbConditions(); index++) {
@@ -165,7 +165,7 @@
     return rule;
   }
 
-  CDRule generate1Rule(int nbattributes) {
+  static CDRule generate1Rule() {
     CDRule rule = new CDRule(1);
 
     for (int index = 0; index < rule.getNbConditions(); index++) {

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessTest.java Mon Aug 24 20:16:37 2009
@@ -30,18 +30,16 @@
   public void testGet() {
     int n = 100;
     Random rng = new MersenneTwisterRNG();
-    int tp, tn, fp, fn;
-    double se, sp;
 
     for (int nloop = 0; nloop < n; nloop++) {
-      tp = rng.nextInt(1000);
-      tn = rng.nextInt(1000);
-      fp = rng.nextInt(1000);
-      fn = rng.nextInt(1000);
+      int tp = rng.nextInt(1000);
+      int tn = rng.nextInt(1000);
+      int fp = rng.nextInt(1000);
+      int fn = rng.nextInt(1000);
 
       CDFitness fitness = new CDFitness(tp, fp, tn, fn);
-      se = ((double) tp) / (tp + fn);
-      sp = ((double) tn) / (tn + fp);
+      double se = ((double) tp) / (tp + fn);
+      double sp = ((double) tn) / (tn + fp);
 
       assertEquals(se * sp, fitness.get());
     }