You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/08/31 18:25:12 UTC

svn commit: r690720 - in /lucene/mahout/trunk: core/ core/lib/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/impl/common/ core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ core/src/ma...

Author: srowen
Date: Sun Aug 31 09:25:09 2008
New Revision: 690720

URL: http://svn.apache.org/viewvc?rev=690720&view=rev
Log:
Move to Java 6, later versions of libraries. Yet more small tweaks to code based on analysis, including making sure to use UTF-8 encoding everywhere rather than platform default. Standardized on MersenneTwisterRNG. Used Class.asSubclass() to avoid casts.

Added:
    lucene/mahout/trunk/core/lib/commons-httpclient-3.1.jar   (with props)
    lucene/mahout/trunk/core/lib/hadoop-0.18.0-core.jar   (with props)
    lucene/mahout/trunk/core/lib/jets3t-0.6.1.jar   (with props)
    lucene/mahout/trunk/core/lib/kfs-0.2.1.jar   (with props)
    lucene/mahout/trunk/core/lib/kfs-LICENSE.txt   (contents, props changed)
      - copied, changed from r688323, lucene/mahout/trunk/core/lib/kfs-0.1.LICENSE.txt
Removed:
    lucene/mahout/trunk/core/lib/commons-httpclient-3.0.1.jar
    lucene/mahout/trunk/core/lib/hadoop-0.17.1-core.jar
    lucene/mahout/trunk/core/lib/jets3t-0.5.0.jar
    lucene/mahout/trunk/core/lib/kfs-0.1.LICENSE.txt
    lucene/mahout/trunk/core/lib/kfs-0.1.jar
Modified:
    lucene/mahout/trunk/core/build.xml
    lucene/mahout/trunk/core/pom.xml
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
    lucene/mahout/trunk/core/taste-build.xml
    lucene/mahout/trunk/examples/build.xml
    lucene/mahout/trunk/examples/pom.xml
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
    lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java

Modified: lucene/mahout/trunk/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/build.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/build.xml (original)
+++ lucene/mahout/trunk/core/build.xml Sun Aug 31 09:25:09 2008
@@ -30,7 +30,7 @@
   </tstamp>
 
   <!-- Java Version we are compatible with -->
-  <property name="java.compat.version" value="1.5" />
+  <property name="java.compat.version" value="6" />
 
   <!-- Mahout Implementation Version -->
   <!--
@@ -80,7 +80,7 @@
   <property name="build.javadoc" value="${build.docs}/api"/>
   <property name="javadoc.access" value="protected"/>
   <property name="javadoc.link.java"
-            value="http://java.sun.com/j2se/1.5.0/docs/api/"/>
+            value="http://java.sun.com/javase/6/docs/api/"/>
   <property name="javadoc.link.junit"
             value="http://junit.sourceforge.net/javadoc/"/>
   <property name="javadoc.link.lucene"

Added: lucene/mahout/trunk/core/lib/commons-httpclient-3.1.jar
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/commons-httpclient-3.1.jar?rev=690720&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/mahout/trunk/core/lib/commons-httpclient-3.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/mahout/trunk/core/lib/hadoop-0.18.0-core.jar
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/hadoop-0.18.0-core.jar?rev=690720&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/mahout/trunk/core/lib/hadoop-0.18.0-core.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/mahout/trunk/core/lib/jets3t-0.6.1.jar
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/jets3t-0.6.1.jar?rev=690720&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/mahout/trunk/core/lib/jets3t-0.6.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/mahout/trunk/core/lib/kfs-0.2.1.jar
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/kfs-0.2.1.jar?rev=690720&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/mahout/trunk/core/lib/kfs-0.2.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Copied: lucene/mahout/trunk/core/lib/kfs-LICENSE.txt (from r688323, lucene/mahout/trunk/core/lib/kfs-0.1.LICENSE.txt)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/kfs-LICENSE.txt?p2=lucene/mahout/trunk/core/lib/kfs-LICENSE.txt&p1=lucene/mahout/trunk/core/lib/kfs-0.1.LICENSE.txt&r1=688323&r2=690720&rev=690720&view=diff
==============================================================================
    (empty)

Propchange: lucene/mahout/trunk/core/lib/kfs-LICENSE.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/mahout/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/pom.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/pom.xml (original)
+++ lucene/mahout/trunk/core/pom.xml Sun Aug 31 09:25:09 2008
@@ -25,8 +25,8 @@
         <artifactId>maven-compiler-plugin</artifactId>
         <configuration>
           <encoding>UTF-8</encoding>
-          <source>1.5</source>
-          <target>1.5</target>
+          <source>6</source>
+          <target>6</target>
         </configuration>
       </plugin>
 
@@ -218,7 +218,7 @@
     <dependency>
       <groupId>commons-httpclient</groupId>
       <artifactId>commons-httpclient</artifactId>
-      <version>3.0.1</version>
+      <version>3.1</version>
     </dependency>
 
     <dependency>

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java Sun Aug 31 09:25:09 2008
@@ -23,6 +23,9 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
+/**
+ * A {@link WritableComparable} encapsulating two {@link org.apache.mahout.cf.taste.model.Item}s.
+ */
 public final class ItemItemWritable implements WritableComparable {
 
   private String itemAID;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java Sun Aug 31 09:25:09 2008
@@ -24,6 +24,7 @@
 import java.io.IOException;
 
 /**
+ * A {@link Writable} encapsulating a {@link org.apache.mahout.cf.taste.model.Item} and a preference value.
  */
 public final class ItemPrefWritable implements Writable {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java Sun Aug 31 09:25:09 2008
@@ -90,8 +90,7 @@
       throw new RuntimeException(ioe);
     }
     try {
-      Class<? extends Recommender> recommenderClass =
-          (Class<? extends Recommender>) Class.forName(recommenderClassName);
+      Class<? extends Recommender> recommenderClass = Class.forName(recommenderClassName).asSubclass(Recommender.class);
       Constructor<? extends Recommender> constructor = recommenderClass.getConstructor(DataModel.class);
       recommender = constructor.newInstance(fileDataModel);
     } catch (NoSuchMethodException nsme) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java Sun Aug 31 09:25:09 2008
@@ -25,10 +25,9 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.nio.charset.Charset;
 
 /**
  * Iterates over the lines of a text file. This assumes the text file is UTF-8 encoded
@@ -46,13 +45,7 @@
    */
   public FileLineIterator(File file) throws IOException {
     InputStream is = new FileInputStream(file);
-    Reader fileReader;
-    try {
-      fileReader = new InputStreamReader(is, "UTF8");
-    } catch (UnsupportedEncodingException uee) {
-      throw new AssertionError(uee);
-    }
-    reader = new BufferedReader(fileReader);
+    reader = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
     nextLine = reader.readLine();
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java Sun Aug 31 09:25:09 2008
@@ -17,6 +17,8 @@
 
 package org.apache.mahout.cf.taste.impl.common;
 
+import org.uncommons.maths.random.MersenneTwisterRNG;
+
 import java.util.Random;
 
 /**
@@ -25,7 +27,12 @@
  */
 public final class RandomUtils {
 
-  private static final long STANDARD_SEED = 0xCAFEBABEL;
+  private static final byte[] STANDARD_SEED = {
+      (byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
+      (byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
+      (byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
+      (byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
+  };
   private static boolean testSeed;
 
   private RandomUtils() {
@@ -36,7 +43,7 @@
   }
 
   public static Random getRandom() {
-    return testSeed ? new Random(STANDARD_SEED) : new Random();
+    return testSeed ? new MersenneTwisterRNG(STANDARD_SEED) : new MersenneTwisterRNG();
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java Sun Aug 31 09:25:09 2008
@@ -20,16 +20,20 @@
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.correlation.UserCorrelation;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
 
 import java.util.Collection;
+import java.util.Random;
 
 /**
  * <p>Contains methods and resources useful to all classes in this package.</p>
  */
 abstract class AbstractUserNeighborhood implements UserNeighborhood {
 
+  private static final Random random = RandomUtils.getRandom();
+
   private final UserCorrelation userCorrelation;
   private final DataModel dataModel;
   private final double samplingRate;
@@ -61,7 +65,7 @@
   }
 
   final boolean sampleForUser() {
-    return samplingRate >= 1.0 || Math.random() < samplingRate;
+    return samplingRate >= 1.0 || random.nextDouble() < samplingRate;
   }
 
   public final void refresh(Collection<Refreshable> alreadyRefreshed) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java Sun Aug 31 09:25:09 2008
@@ -21,9 +21,11 @@
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.correlation.UserCorrelation;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.model.User;
 
 import java.util.Collection;
+import java.util.Random;
 
 /**
  * <p>Defines cluster similarity as the <em>smallest</em> correlation between any two
@@ -32,6 +34,8 @@
  */
 public final class FarthestNeighborClusterSimilarity implements ClusterSimilarity {
 
+  private static final Random random = RandomUtils.getRandom();
+
   private final UserCorrelation correlation;
   private final double samplingPercentage;
 
@@ -67,7 +71,7 @@
     }
     double leastCorrelation = Double.POSITIVE_INFINITY;
     for (User user1 : cluster1) {
-      if (samplingPercentage >= 1.0 || Math.random() < samplingPercentage) {
+      if (samplingPercentage >= 1.0 || random.nextDouble() < samplingPercentage) {
         for (User user2 : cluster2) {
           double theCorrelation = correlation.userCorrelation(user1, user2);
           if (theCorrelation < leastCorrelation) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java Sun Aug 31 09:25:09 2008
@@ -21,9 +21,11 @@
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.correlation.UserCorrelation;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.model.User;
 
 import java.util.Collection;
+import java.util.Random;
 
 /**
  * <p>Defines cluster similarity as the <em>largest</em> correlation between any two
@@ -32,6 +34,8 @@
  */
 public final class NearestNeighborClusterSimilarity implements ClusterSimilarity {
 
+  private static final Random random = RandomUtils.getRandom();
+
   private final UserCorrelation correlation;
   private final double samplingPercentage;
 
@@ -67,7 +71,7 @@
     }
     double greatestCorrelation = Double.NEGATIVE_INFINITY;
     for (User user1 : cluster1) {
-      if (samplingPercentage >= 1.0 || Math.random() < samplingPercentage) {
+      if (samplingPercentage >= 1.0 || random.nextDouble() < samplingPercentage) {
         for (User user2 : cluster2) {
           double theCorrelation = correlation.userCorrelation(user1, user2);
           if (theCorrelation > greatestCorrelation) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Sun Aug 31 09:25:09 2008
@@ -324,37 +324,10 @@
     }
   }
 
-  private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done) throws TasteException {
+  private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done) 
+      throws TasteException {
     // We find a certain number of closest clusters...
-    boolean full = false;
-    LinkedList<ClusterClusterPair> queue = new LinkedList<ClusterClusterPair>();
-    int i = 0;
-    for (Collection<User> cluster1 : clusters) {
-      i++;
-      ListIterator<Collection<User>> it2 = clusters.listIterator(i);
-      while (it2.hasNext()) {
-        Collection<User> cluster2 = it2.next();
-        double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
-        if (!Double.isNaN(similarity) &&
-            (!full || similarity > queue.getLast().getSimilarity())) {
-          ListIterator<ClusterClusterPair> queueIterator =
-                  queue.listIterator(queue.size());
-          while (queueIterator.hasPrevious()) {
-            if (similarity <= queueIterator.previous().getSimilarity()) {
-              queueIterator.next();
-              break;
-            }
-          }
-          queueIterator.add(new ClusterClusterPair(cluster1, cluster2, similarity));
-          if (full) {
-            queue.removeLast();
-          } else if (queue.size() > numUsers) { // use numUsers as queue size limit
-            full = true;
-            queue.removeLast();
-          }
-        }
-      }
-    }
+    LinkedList<ClusterClusterPair> queue = findClosestClusters(numUsers, clusters);
 
     // The first one is definitely the closest pair in existence so we can cluster
     // the two together, put it back into the set of clusters, and start again. Instead
@@ -435,6 +408,40 @@
     return done;
   }
 
+  private LinkedList<ClusterClusterPair> findClosestClusters(int numUsers, List<Collection<User>> clusters)
+      throws TasteException {
+    boolean full = false;
+    LinkedList<ClusterClusterPair> queue = new LinkedList<ClusterClusterPair>();
+    int i = 0;
+    for (Collection<User> cluster1 : clusters) {
+      i++;
+      ListIterator<Collection<User>> it2 = clusters.listIterator(i);
+      while (it2.hasNext()) {
+        Collection<User> cluster2 = it2.next();
+        double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
+        if (!Double.isNaN(similarity) &&
+            (!full || similarity > queue.getLast().getSimilarity())) {
+          ListIterator<ClusterClusterPair> queueIterator =
+                  queue.listIterator(queue.size());
+          while (queueIterator.hasPrevious()) {
+            if (similarity <= queueIterator.previous().getSimilarity()) {
+              queueIterator.next();
+              break;
+            }
+          }
+          queueIterator.add(new ClusterClusterPair(cluster1, cluster2, similarity));
+          if (full) {
+            queue.removeLast();
+          } else if (queue.size() > numUsers) { // use numUsers as queue size limit
+            full = true;
+            queue.removeLast();
+          }
+        }
+      }
+    }
+    return queue;
+  }
+
   private static Map<Object, List<RecommendedItem>> computeTopRecsPerUserID(Iterable<Collection<User>> clusters)
           throws TasteException {
     Map<Object, List<RecommendedItem>> recsPerUser = new FastMap<Object, List<RecommendedItem>>();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Sun Aug 31 09:25:09 2008
@@ -287,8 +287,8 @@
       String label = cmdLine.getOptionValue(labelOpt.getOpt());
       Analyzer analyzer;
       if (cmdLine.hasOption(analyzerOpt.getOpt())) {
-        analyzer = (Analyzer) Class.forName(
-            cmdLine.getOptionValue(analyzerOpt.getOpt())).newInstance();
+        analyzer = Class.forName(
+            cmdLine.getOptionValue(analyzerOpt.getOpt())).asSubclass(Analyzer.class).newInstance();
       } else {
         analyzer = new StandardAnalyzer();
       }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Sun Aug 31 09:25:09 2008
@@ -44,6 +44,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.nio.charset.Charset;
 
 public class Classify {
 
@@ -85,8 +86,8 @@
 
     log.info("Loading model from: {}", modelPaths);
 
-    Model model = null;
-    Classifier classifier = null;
+    Model model;
+    Classifier classifier;
 
     String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
 
@@ -98,6 +99,8 @@
       log.info("Testing Complementary Bayes Classifier");
       model = new CBayesModel();
       classifier = new CBayesClassifier();
+    } else {
+      throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
     }
 
     model = reader.loadModel(model, fs, modelPaths, conf);
@@ -119,8 +122,7 @@
     Analyzer analyzer = null;
     if (cmdLine.hasOption(analyzerOpt.getOpt())) {
       String className = cmdLine.getOptionValue(analyzerOpt.getOpt());
-      Class clazz = Class.forName(className);
-      analyzer = (Analyzer) clazz.newInstance();
+      analyzer = Class.forName(className).asSubclass(Analyzer.class).newInstance();
     }
     if (analyzer == null) {
       analyzer = new StandardAnalyzer();
@@ -134,7 +136,7 @@
     }
 
     log.info("Converting input document to proper format");
-    String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(docPath), encoding));
+    String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(docPath), Charset.forName(encoding)));
     StringBuilder line = new StringBuilder();
     for(String token : document)
     {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java Sun Aug 31 09:25:09 2008
@@ -25,6 +25,7 @@
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.Map;
+import java.util.Deque;
 
 
 /**
@@ -54,7 +55,7 @@
       }
     }
 
-    LinkedList<ClassifierResult> result = new LinkedList<ClassifierResult>();
+    Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
     while ((tmp = (ClassifierResult) pq.pop()) != null) {
       result.addLast(tmp);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java Sun Aug 31 09:25:09 2008
@@ -45,7 +45,7 @@
     float numerator =  result + alpha_i;
     float denominator =(sumLabelWeight + vocabCount);
     
-    float weight = new Double(Math.log(numerator /denominator)).floatValue();
+    float weight = (float) Math.log(numerator /denominator);
     result = (-1.0f * (weight));
 
     return result;
@@ -98,7 +98,7 @@
           float D_ij = getWeightUnprocessed(label, feature);
           float sumLabelWeight = getSumLabelWeight(label);
           // TODO srowen says sigma_j is unused
-          float sigma_j = getSumFeatureWeight(feature);
+          //float sigma_j = getSumFeatureWeight(feature);
 
           float numerator = D_ij + alpha_i;
           float denominator = sumLabelWeight + vocabCount;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java Sun Aug 31 09:25:09 2008
@@ -87,7 +87,7 @@
       dfs.delete(outPath, true);
 
     DefaultStringifier<Integer> intStringifier = new DefaultStringifier<Integer>(conf, Integer.class);
-    String gramSizeString = intStringifier.toString(new Integer(gramSize));
+    String gramSizeString = intStringifier.toString(Integer.valueOf(gramSize));
 
     Integer retGramSize = intStringifier.fromString(gramSizeString);
     log.info("{}", retGramSize);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java Sun Aug 31 09:25:09 2008
@@ -26,14 +26,12 @@
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.classifier.BayesFileFormatter;
-import org.apache.mahout.common.Model;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.List;
 
 /**
  * Reads the input train set(preprocessed using the {@link BayesFileFormatter}).
@@ -66,7 +64,7 @@
   public void map(Text key, Text value,
       OutputCollector<Text, FloatWritable> output, Reporter reporter)
       throws IOException {
-    String line = value.toString();
+    //String line = value.toString();
     String label = key.toString();
     int keyLen = label.length();
 
@@ -77,7 +75,7 @@
     builder.ensureCapacity(32);// make sure we have a reasonably size buffer to
                                // begin with
     // TODO: srowen says this var isn't used right now
-    List<String> previousN_1Grams  = Model.generateNGramsWithoutLabel(line, keyLen);
+    //List<String> previousN_1Grams  = Model.generateNGramsWithoutLabel(line, keyLen);
     
     double lengthNormalisation = 0.0;
     for (double D_kj : wordList.values()) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java Sun Aug 31 09:25:09 2008
@@ -201,7 +201,7 @@
       // the key is either _label_ or label,feature
       while (reader.next(key, value)) {
         String keyStr = key.toString();
-
+        // TODO srowen says we should probably collapse these empty branches?
         if (keyStr.startsWith("_")) {
 
         } else if (keyStr.startsWith(",")) {
@@ -211,8 +211,9 @@
         } else {
           int idx = keyStr.indexOf(",");
           if (idx != -1) {
+            // TODO srowen says data is not used?
             Map<String, Float> data = new HashMap<String, Float>();
-            data.put(keyStr.substring(0, idx), new Float(value.get()));
+            data.put(keyStr.substring(0, idx), value.get());
             writer.append(new Text(key.toString()), value);
           }
         }
@@ -237,7 +238,7 @@
       while (reader.next(key, value)) {
         String keyStr = key.toString();
         if (keyStr.startsWith("_")) { // Sum of weights of labels
-          labelSum.put(keyStr.substring(1), new Float(value.get()));
+          labelSum.put(keyStr.substring(1), value.get());
         }
 
       }
@@ -260,7 +261,7 @@
       while (reader.next(key, value)) {
         String keyStr = key.toString();
         if (keyStr.startsWith("_")) { // Count of Documents in a Label
-          labelDocumentCounts.put(keyStr.substring(1), new Float(value.get()));
+          labelDocumentCounts.put(keyStr.substring(1), value.get());
         }
 
       }
@@ -285,7 +286,7 @@
         if (weightSum.size() > 1) {
           throw new IOException("Incorrect Sum File");
         } else if (keyStr.startsWith("*")) {
-          weightSum.put(keyStr, new Float(value.get()));
+          weightSum.put(keyStr, value.get());
         }
 
       }
@@ -311,7 +312,7 @@
           throw new IOException("Incorrect vocabCount File");
         }
         if (keyStr.startsWith("*")) {
-          weightSum.put(keyStr, new Float(value.get()));
+          weightSum.put(keyStr, value.get());
         }
 
       }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java Sun Aug 31 09:25:09 2008
@@ -25,6 +25,7 @@
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.Map;
+import java.util.Deque;
 
 /**
  * Classifies documents based on a {@link CBayesModel}.  
@@ -52,7 +53,7 @@
       }
     }
 
-    LinkedList<ClassifierResult> result = new LinkedList<ClassifierResult>();
+    Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
     while ((tmp = (ClassifierResult) pq.pop()) != null) {
       result.addLast(tmp);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java Sun Aug 31 09:25:09 2008
@@ -42,7 +42,7 @@
     float numerator = sigma_j - result + alpha_i;
     float denominator =(sigma_jSigma_k - sumLabelWeight + vocabCount);
     
-    float weight = new Double(Math.log(numerator /denominator)).floatValue();
+    float weight = (float) Math.log(numerator /denominator);
     result = (-1.0f * (weight / getThetaNormalizer(label)));
     return result;
   }
@@ -81,7 +81,7 @@
     log.info("{}", thetaNormalizer);
     
     /*for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
-      thetaNormalizer.put(label, new Float(0));
+      thetaNormalizer.put(label, 0.0f);
     }
     for (int feature = 0, maxFeatures = featureList.size(); feature < maxFeatures; feature++) {
       for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Sun Aug 31 09:25:09 2008
@@ -250,7 +250,7 @@
     String id = formattedString.substring(0, beginIndex);
     String centroid = formattedString.substring(beginIndex);
     if (id.startsWith("C")) {
-      int canopyId = new Integer(formattedString.substring(1, beginIndex - 2));
+      int canopyId = Integer.parseInt(formattedString.substring(1, beginIndex - 2));
       Vector canopyCentroid = AbstractVector.decodeVector(centroid);
       return new Canopy(canopyCentroid, canopyId);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java Sun Aug 31 09:25:09 2008
@@ -39,8 +39,8 @@
     String clusters = args[index++];
     String output = args[index++];
      String measureClass = args[index++];
-    double convergenceDelta = new Double(args[index++]);
-    int maxIterations = new Integer(args[index++]);
+    double convergenceDelta = Double.parseDouble(args[index++]);
+    int maxIterations = Integer.parseInt(args[index++]);
     int numMapTasks = Integer.parseInt(args[index++]);
     boolean doCanopy = Boolean.parseBoolean(args[index++]);
     int m = Integer.parseInt(args[index++]);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java Sun Aug 31 09:25:09 2008
@@ -85,7 +85,6 @@
    * Configure the mapper with the cluster info
    * 
    * @param job
-   * @param clusters
    */
   protected void configureWithClusterInfo(JobConf job) {
     // Get the path location where the cluster Info is stored
@@ -116,7 +115,7 @@
         try {
           Text key = new Text();
           Text value = new Text();
-          int counter = 1;
+          //int counter = 1;
           while (reader.next(key, value)) {
             // get the cluster info
             SoftCluster cluster = SoftCluster.decodeCluster(value.toString());

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java Sun Aug 31 09:25:09 2008
@@ -50,7 +50,7 @@
   private static int nextClusterId = 0;
 
   // this cluster's clusterId
-  private int clusterId;
+  private final int clusterId;
 
   // the current center
   private Vector center = new SparseVector(0);
@@ -92,10 +92,8 @@
     String id = formattedString.substring(0, beginIndex);
     String center = formattedString.substring(beginIndex);
     if (id.startsWith("C") || id.startsWith("V")) {
-      int clusterId = new Integer(formattedString.substring(1, beginIndex - 2));
-      Vector clusterCenter = null;
-
-      clusterCenter = AbstractVector.decodeVector(center);
+      int clusterId = Integer.parseInt(formattedString.substring(1, beginIndex - 2));
+      Vector clusterCenter = AbstractVector.decodeVector(center);
 
       SoftCluster cluster = new SoftCluster(clusterCenter, clusterId);
       cluster.converged = id.startsWith("V");

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java Sun Aug 31 09:25:09 2008
@@ -87,8 +87,7 @@
    */
   public static void configure(JobConf job) {
     try {
-      Class<?> cl = Class.forName(job.get(DISTANCE_MEASURE_KEY));
-      measure = (DistanceMeasure) cl.newInstance();
+      measure = Class.forName(job.get(DISTANCE_MEASURE_KEY)).asSubclass(DistanceMeasure.class).newInstance();
       measure.configure(job);
     } catch (ClassNotFoundException e) {
       throw new RuntimeException(e);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java Sun Aug 31 09:25:09 2008
@@ -30,7 +30,6 @@
 
 /**
  * The Base Model Class. Currently there are some Bayes Model elements which have to be refactored out later.
- * 
  */
 public abstract class Model {
 
@@ -50,9 +49,9 @@
 
   protected final Map<Integer, Float> thetaNormalizer = new HashMap<Integer, Float>();
 
-  protected Float sigma_jSigma_k = new Float(0);
+  protected float sigma_jSigma_k = 0.0f;
 
-  protected final Float alpha_i = 1.0f; // alpha_i can be improved upon for increased smoothing
+  protected final float alpha_i = 1.0f; // alpha_i can be improved upon for increased smoothing
   
   protected abstract float FeatureWeight(Integer label, Integer feature);
   
@@ -119,23 +118,23 @@
     if (featureLabelWeights.size() <= feature) {
       throw new IllegalStateException("This should not happen");
     }
-    featureLabelWeights.get(feature).put(label, new Float(weight));
+    featureLabelWeights.get(feature).put(label, weight);
   }
 
   protected void setSumFeatureWeight(Integer feature, float sum) {
     if (sumFeatureWeight.size() != feature)
       throw new IllegalStateException("This should not happen");
-    sumFeatureWeight.add(feature, new Float(sum));
+    sumFeatureWeight.add(feature, sum);
   }
 
   protected void setSumLabelWeight(Integer label, float sum) {
     if (sumLabelWeight.size() != label)
       throw new IllegalStateException("This should not happen");
-    sumLabelWeight.put(label, new Float(sum));
+    sumLabelWeight.put(label, sum);
   }
 
   protected void setThetaNormalizer(Integer label, float sum) {
-    thetaNormalizer.put(label, new Float(sum));
+    thetaNormalizer.put(label, sum);
   }
 
   public void initializeWeightMatrix() {
@@ -145,7 +144,7 @@
       featureLabelWeights.add(new HashMap<Integer, Float>(1));
   }
 
-  public void setSigma_jSigma_k(Float sigma_jSigma_k) {
+  public void setSigma_jSigma_k(float sigma_jSigma_k) {
     this.sigma_jSigma_k = sigma_jSigma_k;
   }
 
@@ -203,8 +202,7 @@
       
       StringBuilder gramBuilder = new StringBuilder();
      
-      for(String gram: previousN_1Grams)
-      {
+      for (String gram : previousN_1Grams) {
         gramBuilder.append(gram);
         String token = gramBuilder.toString();        
         tokens.add(token);
@@ -232,12 +230,11 @@
       
       StringBuilder gramBuilder = new StringBuilder();
      
-      for(String gram: previousN_1Grams)
-      {
+      for (String gram : previousN_1Grams) {
         gramBuilder.append(gram);
         String token = gramBuilder.toString();        
         tokens.add(token);
-        gramBuilder.append(" ");
+        gramBuilder.append(' ');
       }
     }
     

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java Sun Aug 31 09:25:09 2008
@@ -213,7 +213,7 @@
     String vectorClassName = new String(buf);
     Vector vector;
     try {
-      vector = (Vector) Class.forName(vectorClassName).newInstance();
+      vector = Class.forName(vectorClassName).asSubclass(Vector.class).newInstance();
     } catch (ClassNotFoundException e) {
       throw new RuntimeException(e);
     } catch (IllegalAccessException e) {

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java Sun Aug 31 09:25:09 2008
@@ -63,7 +63,12 @@
   private static final int NUM_PREFS = 20;
   private static final int NUM_THREADS = 4;
 
-  private final Random random = RandomUtils.getRandom();
+  private Random random;
+
+  public void setUp() throws Exception {
+    super.setUp();
+    random = RandomUtils.getRandom();
+  }
 
   public void testSlopeOneLoad() throws Exception {
     DataModel model = createModel();

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java Sun Aug 31 09:25:09 2008
@@ -17,15 +17,15 @@
 
 package org.apache.mahout.cf.taste.impl.common;
 
-import junit.framework.TestCase;
-
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.TreeSet;
 
-public final class IteratorUtilsTest extends TestCase {
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+
+public final class IteratorUtilsTest extends TasteTestCase {
 
   private static final List<String> TEST_DATA;
 

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java Sun Aug 31 09:25:09 2008
@@ -17,8 +17,8 @@
 
 package org.apache.mahout.cf.taste.impl.common;
 
-import junit.framework.TestCase;
 import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
 
 import java.util.Collection;
 import java.util.HashSet;
@@ -26,7 +26,7 @@
 /**
  * Tests {@link RefreshHelper}
  */
-public final class RefreshHelperTest extends TestCase {
+public final class RefreshHelperTest extends TasteTestCase {
 
   public void testCallable() {
     MockRefreshable mock = new MockRefreshable();

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java Sun Aug 31 09:25:09 2008
@@ -37,7 +37,7 @@
     RecommenderEvaluator evaluator =
             new AverageAbsoluteDifferenceRecommenderEvaluator();
     double eval = evaluator.evaluate(builder, model, 0.75, 1.0);
-    assertEquals(0.26387685767414826, eval, EPSILON);
+    assertEquals(0.12387145540086855, eval, EPSILON);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java Sun Aug 31 09:25:09 2008
@@ -36,7 +36,7 @@
     };
     RecommenderEvaluator evaluator = new RMSRecommenderEvaluator();
     double eval = evaluator.evaluate(builder, model, 0.75, 1.0);
-    assertEquals(0.26387685767414826, eval, EPSILON);
+    assertEquals(0.15553128261991583, eval, EPSILON);
   }
 
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java Sun Aug 31 09:25:09 2008
@@ -17,9 +17,9 @@
 
 package org.apache.mahout.cf.taste.impl.recommender;
 
-import junit.framework.TestCase;
 import org.apache.mahout.cf.taste.impl.model.GenericItem;
 import org.apache.mahout.cf.taste.impl.model.GenericUser;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
 import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.User;
@@ -30,7 +30,7 @@
 /**
  * <p>Tests {@link NullRescorer}.</p>
  */
-public final class NullRescorerTest extends TestCase {
+public final class NullRescorerTest extends TasteTestCase {
 
   public void testItemRescorer() throws Exception {
     Rescorer<Item> rescorer = NullRescorer.getItemInstance();

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java Sun Aug 31 09:25:09 2008
@@ -24,9 +24,11 @@
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.io.Writer;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
 import java.nio.charset.Charset;
 
 public class BayesFileFormatterTest extends TestCase {
@@ -52,7 +54,7 @@
     words = new String[]{"dog", "cat", "fish", "snake", "zebra"};
     for (String word : words) {
       File file = new File(input, word);
-      FileWriter writer = new FileWriter(file);
+      Writer writer = new OutputStreamWriter(new FileOutputStream(file), Charset.forName("UTF-8"));
       writer.write(word);
       writer.close();
     }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Sun Aug 31 09:25:09 2008
@@ -37,14 +37,17 @@
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.nio.charset.Charset;
 
 public class TestCanopyCreation extends TestCase {
   static final double[][] raw = { { 1, 1 }, { 2, 1 }, { 1, 2 }, { 2, 2 },
@@ -144,11 +147,11 @@
 
   private void writePointsToFileWithPayload(List<Vector> points,
       String fileName, String payload) throws IOException {
-    BufferedWriter output = new BufferedWriter(new FileWriter(fileName));
+    BufferedWriter output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), Charset.forName("UTF-8")));
     for (Vector point : points) {
       output.write(point.asFormatString());
       output.write(payload);
-      output.write("\n");
+      output.write('\n');
     }
     output.flush();
     output.close();
@@ -674,8 +677,8 @@
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
         ManhattanDistanceMeasure.class.getName(), 3.1, 2.1);
-    BufferedReader reader = new BufferedReader(new FileReader(
-        "output/clusters/part-00000"));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+        "output/clusters/part-00000"), Charset.forName("UTF-8")));
     int count = 0;
     while (reader.ready()) {
       System.out.println(reader.readLine());
@@ -702,8 +705,8 @@
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
         EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
-    BufferedReader reader = new BufferedReader(new FileReader(
-        "output/clusters/part-00000"));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+        "output/clusters/part-00000"), Charset.forName("UTF-8")));
     int count = 0;
     while (reader.ready()) {
       System.out.println(reader.readLine());
@@ -732,8 +735,8 @@
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
         ManhattanDistanceMeasure.class.getName(), 3.1, 2.1);
-    BufferedReader reader = new BufferedReader(new FileReader(
-        "output/clusters/part-00000"));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+        "output/clusters/part-00000"), Charset.forName("UTF-8")));
     int count = 0;
     while (reader.ready()) {
       String line = reader.readLine();
@@ -764,8 +767,8 @@
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output",
         EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
-    BufferedReader reader = new BufferedReader(new FileReader(
-        "output/clusters/part-00000"));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+        "output/clusters/part-00000"), Charset.forName("UTF-8")));
     int count = 0;
     while (reader.ready()) {
       String line = reader.readLine();

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java Sun Aug 31 09:25:09 2008
@@ -18,12 +18,13 @@
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileReader;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.nio.charset.Charset;
 
 import junit.framework.TestCase;
 
@@ -63,7 +64,7 @@
 
     // Shift the decimal the correct number of places
     // to the right.
-    val = val * factor;
+    val *= factor;
 
     // Round to the nearest integer.
     long tmp = Math.round(val);
@@ -88,6 +89,7 @@
     DistanceMeasure measure = (DistanceMeasure) cl.newInstance();
     SoftCluster.config(measure, threshold);
     boolean converged = false;
+    // TODO srowen notes that converged is always false?
     for (int iter = 0; !converged && iter < numIter; iter++) {
       iterateReference(points, clusterList, measure);
     }
@@ -158,14 +160,9 @@
       // run reference FuzzyKmeans algorithm
       referenceFuzzyKMeans(points, clusterList, pointClusterInfo,
           EuclideanDistanceMeasure.class.getName(), 0.001, 2);
-      Iterator<Map.Entry<String, String>> iterator = pointClusterInfo
-          .entrySet().iterator();
 
       // iterate for each point
-      while (iterator.hasNext()) {
-        Map.Entry<String, String> entry = iterator.next();
-        String value = entry.getValue();
-
+      for (String value : pointClusterInfo.values()) {
         String clusterInfoStr = value.substring(1, value.length() - 1);
         String[] clusterInfoList = clusterInfoStr.split(" ");
         assertEquals("Number of clusters", k + 1, clusterInfoList.length);
@@ -232,8 +229,8 @@
       assertTrue("output dir exists?", outDir.exists());
       String[] outFiles = outDir.list();
       assertEquals("output dir files?", 4, outFiles.length);
-      BufferedReader reader = new BufferedReader(new FileReader(
-          "output/points/part-00000"));
+      BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+          "output/points/part-00000"), Charset.forName("UTF-8")));
 
       while (reader.ready()) {
         String line = reader.readLine();
@@ -298,7 +295,8 @@
       Map<String, Double> pointTotalProbMap = new HashMap<String, Double>();
 
       for (String key : mapCollector.getKeys()) {
-        SoftCluster cluster = SoftCluster.decodeCluster(key);
+        //SoftCluster cluster = SoftCluster.decodeCluster(key);
+        // TODO srowen says cluster is not used?
         List<Text> values = mapCollector.getValue(key);
 
         for (Text value : values) {
@@ -318,11 +316,8 @@
           pointTotalProbMap.put(encodedVector, probVal + pointProb);
         }
       }
-      Iterator<Map.Entry<String, Double>> iterator = pointTotalProbMap
-          .entrySet().iterator();
 
-      while (iterator.hasNext()) {
-        Map.Entry<String, Double> entry = iterator.next();
+      for (Map.Entry<String, Double> entry : pointTotalProbMap.entrySet()) {
         String key = entry.getKey();
         double value = round(entry.getValue(), 1);
 

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Sun Aug 31 09:25:09 2008
@@ -37,11 +37,14 @@
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
 import java.util.ArrayList;
 import java.util.List;
+import java.nio.charset.Charset;
 
 public class TestKmeansClustering extends TestCase {
 
@@ -264,13 +267,13 @@
         assertEquals("too many values", 1, values.size());
         String value = values.get(0).toString();
         int ix = value.indexOf(",");
-        count += new Integer(value.substring(0, ix));
+        count += Integer.parseInt(value.substring(0, ix));
         total = total
             .plus(AbstractVector.decodeVector(value.substring(ix + 2)));
       }
       assertEquals("total points", 9, count);
-      assertEquals("point total[0]", 27, (new Double(total.get(0))).intValue());
-      assertEquals("point total[1]", 27, (new Double(total.get(1))).intValue());
+      assertEquals("point total[0]", 27, (int) total.get(0));
+      assertEquals("point total[1]", 27, (int) total.get(1));
     }
   }
 
@@ -399,8 +402,8 @@
       assertTrue("output dir exists?", outDir.exists());
       String[] outFiles = outDir.list();
       assertEquals("output dir files?", 4, outFiles.length);
-      BufferedReader reader = new BufferedReader(new FileReader(
-          "output/points/part-00000"));
+      BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+          "output/points/part-00000"), Charset.forName("UTF-8")));
       int[] expect = expectedNumPoints[k];
       DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
       while (reader.ready()) {
@@ -451,8 +454,8 @@
     assertTrue("output dir exists?", outDir.exists());
     String[] outFiles = outDir.list();
     assertEquals("output dir files?", 4, outFiles.length);
-    BufferedReader reader = new BufferedReader(new FileReader(
-        "output/points/part-00000"));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+        "output/points/part-00000"), Charset.forName("UTF-8")));
     DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
     while (reader.ready()) {
       String line = reader.readLine();
@@ -468,11 +471,11 @@
 
   public static void writePointsToFileWithPayload(List<Vector> points,
       String fileName, String payload) throws IOException {
-    BufferedWriter output = new BufferedWriter(new FileWriter(fileName));
+    BufferedWriter output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), Charset.forName("UTF-8")));
     for (Vector point : points) {
       output.write(point.asFormatString());
       output.write(payload);
-      output.write("\n");
+      output.write('\n');
     }
     output.flush();
     output.close();

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Sun Aug 31 09:25:09 2008
@@ -33,12 +33,14 @@
 
 import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.FileOutputStream;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.nio.charset.Charset;
 
 public class TestMeanShift extends TestCase {
 
@@ -73,11 +75,11 @@
    */
   private void writePointsToFileWithPayload(Vector[] points, String fileName,
       String payload) throws IOException {
-    BufferedWriter output = new BufferedWriter(new FileWriter(fileName));
+    BufferedWriter output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), Charset.forName("UTF-8")));
     for (Vector point : points) {
       output.write(new MeanShiftCanopy(point).toString());
       output.write(payload);
-      output.write("\n");
+      output.write('\n');
     }
     output.flush();
     output.close();

Modified: lucene/mahout/trunk/core/taste-build.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/taste-build.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/taste-build.xml (original)
+++ lucene/mahout/trunk/core/taste-build.xml Sun Aug 31 09:25:09 2008
@@ -26,14 +26,15 @@
 
   <property name="axis.jars" location="lib/axis/"/>
   <property name="dbcp.jar" location="lib/commons-dbcp-1.2.2.jar"/>
-  <property name="junit.jar" location="lib/junit-3.8.2.jar"/>
+  <property name="junit.jar" location="../lib/junit-3.8.2.jar"/>
   <property name="pool.jar" location="lib/commons-pool-1.4.jar"/>
   <property name="proguard.jar" location="lib/proguard.jar"/>
   <property name="servlet.jar" location="lib/servlet-api.jar"/>
-  <property name="hadoop.jar" location="lib/hadoop-0.17.1-core.jar"/>
+  <property name="hadoop.jar" location="lib/hadoop-0.18.0-core.jar"/>
   <property name="slf4j-api.jar" location="lib/slf4j-api-1.5.2.jar"/>
   <property name="slf4j.jar" location="lib/slf4j-jcl-1.5.2.jar"/>
   <property name="commons-logging.jar" location="lib/commons-logging-1.1.1.jar"/>
+  <property name="uncommons-math.jar" location="lib/uncommons-maths-1.0.2.jar"/>
 
   <property environment="env"/>
   <property name="name" value="${ant.project.name}"/>
@@ -48,8 +49,8 @@
 
   <target depends="init" name="build" description="Compiles all code">
     <mkdir dir="build"/>
-    <javac source="1.5"
-           target="1.5"
+    <javac source="6"
+           target="6"
            deprecation="true"
            destdir="build"
            debug="true"
@@ -62,6 +63,7 @@
         <pathelement location="${pool.jar}"/>
         <pathelement location="${hadoop.jar}"/>
         <pathelement location="${slf4j-api.jar}"/>
+        <pathelement location="${uncommons-math.jar}"/>
       </classpath>
       <include name="org/apache/mahout/cf/taste/**"/>
     </javac>
@@ -94,7 +96,7 @@
       <arg value="-dontobfuscate"/>
       <arg value="-optimizationpasses 6"/>
       <arg value="-keep public class * { public protected *; }"/>
-      <arg value="-target 5"/>
+      <arg value="-target 6"/>
       <arg value="-verbose"/>
     </java>
     <delete file="temp.jar"/>
@@ -102,8 +104,8 @@
 
   <target depends="build" name="build-test" description="Builds test classes">
     <mkdir dir="build-test"/>
-    <javac source="1.5"
-           target="1.5"
+    <javac source="6"
+           target="6"
            debug="true"
            debuglevel="lines,vars,source"
            deprecation="true"
@@ -157,8 +159,8 @@
         </not>
       </condition>
     </fail>
-    <javac source="1.5"
-           target="1.5"
+    <javac source="6"
+           target="6"
            deprecation="true"
            debug="true"
            optimize="true"
@@ -204,6 +206,7 @@
         <pathelement location="${slf4j-api.jar}"/>
         <pathelement location="${slf4j.jar}"/>
         <pathelement location="${commons-logging.jar}"/>
+        <pathelement location="${uncommons-math.jar}"/>
         <pathelement location="${release-jar}"/>
         <pathelement location="build-test"/>
       </classpath>

Modified: lucene/mahout/trunk/examples/build.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/build.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/build.xml (original)
+++ lucene/mahout/trunk/examples/build.xml Sun Aug 31 09:25:09 2008
@@ -30,7 +30,7 @@
   </tstamp>
 
   <!-- Java Version we are compatible with -->
-  <property name="java.compat.version" value="1.5" />
+  <property name="java.compat.version" value="6" />
 
   <!-- Mahout Implementation Version -->
   <!--
@@ -78,7 +78,7 @@
   <property name="build.javadoc" value="${build.docs}/api"/>
   <property name="javadoc.access" value="protected"/>
   <property name="javadoc.link.java"
-            value="http://java.sun.com/j2se/1.5.0/docs/api/"/>
+            value="http://java.sun.com/javase/6/docs/api/"/>
   <property name="javadoc.link.junit"
             value="http://junit.sourceforge.net/javadoc/"/>
   <property name="javadoc.link.lucene"

Modified: lucene/mahout/trunk/examples/pom.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/pom.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/pom.xml (original)
+++ lucene/mahout/trunk/examples/pom.xml Sun Aug 31 09:25:09 2008
@@ -25,8 +25,8 @@
         <artifactId>maven-compiler-plugin</artifactId>
         <configuration>
           <encoding>UTF-8</encoding>
-          <source>1.5</source>
-          <target>1.5</target>
+          <source>6</source>
+          <target>6</target>
         </configuration>
       </plugin>
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java Sun Aug 31 09:25:09 2008
@@ -26,13 +26,14 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintWriter;
+import java.io.OutputStreamWriter;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.nio.charset.Charset;
 
 public final class GroupLensDataModel extends FileDataModel {
 
@@ -75,7 +76,7 @@
     if (!resultFile.exists()) {
       PrintWriter writer = null;
       try {
-        writer = new PrintWriter(new FileWriter(resultFile));
+        writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
         for (String line : new FileLineIterable(originalFile)) {
           String convertedLine;
           if (ratings) {

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java Sun Aug 31 09:25:09 2008
@@ -98,8 +98,8 @@
 
     log.info("Loading model from: {}", modelPaths);
 
-    Model model = null;
-    Classifier classifier = null;
+    Model model;
+    Classifier classifier;
 
     String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
 
@@ -111,6 +111,8 @@
       log.info("Testing Complementary Bayes Classifier");
       model = new CBayesModel();
       classifier = new CBayesClassifier();
+    } else {
+      throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
     }
 
     model = reader.loadModel(model, fs, modelPaths, conf);

Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java Sun Aug 31 09:25:09 2008
@@ -23,12 +23,11 @@
 import org.apache.hadoop.fs.Path;
 
 import java.io.BufferedReader;
-import java.io.FileReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.nio.charset.Charset;
 
-/**
- * 
- */
 public class FileInfosDatasetTest extends TestCase {
 
   public void testRanges() throws IOException {
@@ -39,7 +38,7 @@
     DataSet.initialize(dataset);
 
     String filename = "build/test-classes/wdbc/wdbc.data";
-    BufferedReader in = new BufferedReader(new FileReader(filename));
+    BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename), Charset.forName("UTF-8")));
 
     String line;
     DataLine dl = new DataLine();