You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/08/31 18:25:12 UTC
svn commit: r690720 - in /lucene/mahout/trunk: core/ core/lib/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/impl/common/
core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/
core/src/ma...
Author: srowen
Date: Sun Aug 31 09:25:09 2008
New Revision: 690720
URL: http://svn.apache.org/viewvc?rev=690720&view=rev
Log:
Move to Java 6, later versions of libraries. Yet more small tweaks to code based on analysis, including making sure to use UTF-8 encoding everywhere rather than platform default. Standardized on MersenneTwisterRNG. Used Class.asSubclass() to avoid casts.
Added:
lucene/mahout/trunk/core/lib/commons-httpclient-3.1.jar (with props)
lucene/mahout/trunk/core/lib/hadoop-0.18.0-core.jar (with props)
lucene/mahout/trunk/core/lib/jets3t-0.6.1.jar (with props)
lucene/mahout/trunk/core/lib/kfs-0.2.1.jar (with props)
lucene/mahout/trunk/core/lib/kfs-LICENSE.txt (contents, props changed)
- copied, changed from r688323, lucene/mahout/trunk/core/lib/kfs-0.1.LICENSE.txt
Removed:
lucene/mahout/trunk/core/lib/commons-httpclient-3.0.1.jar
lucene/mahout/trunk/core/lib/hadoop-0.17.1-core.jar
lucene/mahout/trunk/core/lib/jets3t-0.5.0.jar
lucene/mahout/trunk/core/lib/kfs-0.1.LICENSE.txt
lucene/mahout/trunk/core/lib/kfs-0.1.jar
Modified:
lucene/mahout/trunk/core/build.xml
lucene/mahout/trunk/core/pom.xml
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
lucene/mahout/trunk/core/taste-build.xml
lucene/mahout/trunk/examples/build.xml
lucene/mahout/trunk/examples/pom.xml
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java
Modified: lucene/mahout/trunk/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/build.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/build.xml (original)
+++ lucene/mahout/trunk/core/build.xml Sun Aug 31 09:25:09 2008
@@ -30,7 +30,7 @@
</tstamp>
<!-- Java Version we are compatible with -->
- <property name="java.compat.version" value="1.5" />
+ <property name="java.compat.version" value="6" />
<!-- Mahout Implementation Version -->
<!--
@@ -80,7 +80,7 @@
<property name="build.javadoc" value="${build.docs}/api"/>
<property name="javadoc.access" value="protected"/>
<property name="javadoc.link.java"
- value="http://java.sun.com/j2se/1.5.0/docs/api/"/>
+ value="http://java.sun.com/javase/6/docs/api/"/>
<property name="javadoc.link.junit"
value="http://junit.sourceforge.net/javadoc/"/>
<property name="javadoc.link.lucene"
Added: lucene/mahout/trunk/core/lib/commons-httpclient-3.1.jar
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/commons-httpclient-3.1.jar?rev=690720&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/mahout/trunk/core/lib/commons-httpclient-3.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/mahout/trunk/core/lib/hadoop-0.18.0-core.jar
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/hadoop-0.18.0-core.jar?rev=690720&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/mahout/trunk/core/lib/hadoop-0.18.0-core.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/mahout/trunk/core/lib/jets3t-0.6.1.jar
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/jets3t-0.6.1.jar?rev=690720&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/mahout/trunk/core/lib/jets3t-0.6.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/mahout/trunk/core/lib/kfs-0.2.1.jar
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/kfs-0.2.1.jar?rev=690720&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/mahout/trunk/core/lib/kfs-0.2.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Copied: lucene/mahout/trunk/core/lib/kfs-LICENSE.txt (from r688323, lucene/mahout/trunk/core/lib/kfs-0.1.LICENSE.txt)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/lib/kfs-LICENSE.txt?p2=lucene/mahout/trunk/core/lib/kfs-LICENSE.txt&p1=lucene/mahout/trunk/core/lib/kfs-0.1.LICENSE.txt&r1=688323&r2=690720&rev=690720&view=diff
==============================================================================
(empty)
Propchange: lucene/mahout/trunk/core/lib/kfs-LICENSE.txt
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/mahout/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/pom.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/pom.xml (original)
+++ lucene/mahout/trunk/core/pom.xml Sun Aug 31 09:25:09 2008
@@ -25,8 +25,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<encoding>UTF-8</encoding>
- <source>1.5</source>
- <target>1.5</target>
+ <source>6</source>
+ <target>6</target>
</configuration>
</plugin>
@@ -218,7 +218,7 @@
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
- <version>3.0.1</version>
+ <version>3.1</version>
</dependency>
<dependency>
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemItemWritable.java Sun Aug 31 09:25:09 2008
@@ -23,6 +23,9 @@
import java.io.DataOutput;
import java.io.IOException;
+/**
+ * A {@link WritableComparable} encapsulating two {@link org.apache.mahout.cf.taste.model.Item}s.
+ */
public final class ItemItemWritable implements WritableComparable {
private String itemAID;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java Sun Aug 31 09:25:09 2008
@@ -24,6 +24,7 @@
import java.io.IOException;
/**
+ * A {@link Writable} encapsulating a {@link org.apache.mahout.cf.taste.model.Item} and a preference value.
*/
public final class ItemPrefWritable implements Writable {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java Sun Aug 31 09:25:09 2008
@@ -90,8 +90,7 @@
throw new RuntimeException(ioe);
}
try {
- Class<? extends Recommender> recommenderClass =
- (Class<? extends Recommender>) Class.forName(recommenderClassName);
+ Class<? extends Recommender> recommenderClass = Class.forName(recommenderClassName).asSubclass(Recommender.class);
Constructor<? extends Recommender> constructor = recommenderClass.getConstructor(DataModel.class);
recommender = constructor.newInstance(fileDataModel);
} catch (NoSuchMethodException nsme) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java Sun Aug 31 09:25:09 2008
@@ -25,10 +25,9 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.NoSuchElementException;
+import java.nio.charset.Charset;
/**
* Iterates over the lines of a text file. This assumes the text file is UTF-8 encoded
@@ -46,13 +45,7 @@
*/
public FileLineIterator(File file) throws IOException {
InputStream is = new FileInputStream(file);
- Reader fileReader;
- try {
- fileReader = new InputStreamReader(is, "UTF8");
- } catch (UnsupportedEncodingException uee) {
- throw new AssertionError(uee);
- }
- reader = new BufferedReader(fileReader);
+ reader = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
nextLine = reader.readLine();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java Sun Aug 31 09:25:09 2008
@@ -17,6 +17,8 @@
package org.apache.mahout.cf.taste.impl.common;
+import org.uncommons.maths.random.MersenneTwisterRNG;
+
import java.util.Random;
/**
@@ -25,7 +27,12 @@
*/
public final class RandomUtils {
- private static final long STANDARD_SEED = 0xCAFEBABEL;
+ private static final byte[] STANDARD_SEED = {
+ (byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
+ (byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
+ (byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
+ (byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
+ };
private static boolean testSeed;
private RandomUtils() {
@@ -36,7 +43,7 @@
}
public static Random getRandom() {
- return testSeed ? new Random(STANDARD_SEED) : new Random();
+ return testSeed ? new MersenneTwisterRNG(STANDARD_SEED) : new MersenneTwisterRNG();
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java Sun Aug 31 09:25:09 2008
@@ -20,16 +20,20 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.correlation.UserCorrelation;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import java.util.Collection;
+import java.util.Random;
/**
* <p>Contains methods and resources useful to all classes in this package.</p>
*/
abstract class AbstractUserNeighborhood implements UserNeighborhood {
+ private static final Random random = RandomUtils.getRandom();
+
private final UserCorrelation userCorrelation;
private final DataModel dataModel;
private final double samplingRate;
@@ -61,7 +65,7 @@
}
final boolean sampleForUser() {
- return samplingRate >= 1.0 || Math.random() < samplingRate;
+ return samplingRate >= 1.0 || random.nextDouble() < samplingRate;
}
public final void refresh(Collection<Refreshable> alreadyRefreshed) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java Sun Aug 31 09:25:09 2008
@@ -21,9 +21,11 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.correlation.UserCorrelation;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
import org.apache.mahout.cf.taste.model.User;
import java.util.Collection;
+import java.util.Random;
/**
* <p>Defines cluster similarity as the <em>smallest</em> correlation between any two
@@ -32,6 +34,8 @@
*/
public final class FarthestNeighborClusterSimilarity implements ClusterSimilarity {
+ private static final Random random = RandomUtils.getRandom();
+
private final UserCorrelation correlation;
private final double samplingPercentage;
@@ -67,7 +71,7 @@
}
double leastCorrelation = Double.POSITIVE_INFINITY;
for (User user1 : cluster1) {
- if (samplingPercentage >= 1.0 || Math.random() < samplingPercentage) {
+ if (samplingPercentage >= 1.0 || random.nextDouble() < samplingPercentage) {
for (User user2 : cluster2) {
double theCorrelation = correlation.userCorrelation(user1, user2);
if (theCorrelation < leastCorrelation) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java Sun Aug 31 09:25:09 2008
@@ -21,9 +21,11 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.correlation.UserCorrelation;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
import org.apache.mahout.cf.taste.model.User;
import java.util.Collection;
+import java.util.Random;
/**
* <p>Defines cluster similarity as the <em>largest</em> correlation between any two
@@ -32,6 +34,8 @@
*/
public final class NearestNeighborClusterSimilarity implements ClusterSimilarity {
+ private static final Random random = RandomUtils.getRandom();
+
private final UserCorrelation correlation;
private final double samplingPercentage;
@@ -67,7 +71,7 @@
}
double greatestCorrelation = Double.NEGATIVE_INFINITY;
for (User user1 : cluster1) {
- if (samplingPercentage >= 1.0 || Math.random() < samplingPercentage) {
+ if (samplingPercentage >= 1.0 || random.nextDouble() < samplingPercentage) {
for (User user2 : cluster2) {
double theCorrelation = correlation.userCorrelation(user1, user2);
if (theCorrelation > greatestCorrelation) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Sun Aug 31 09:25:09 2008
@@ -324,37 +324,10 @@
}
}
- private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done) throws TasteException {
+ private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done)
+ throws TasteException {
// We find a certain number of closest clusters...
- boolean full = false;
- LinkedList<ClusterClusterPair> queue = new LinkedList<ClusterClusterPair>();
- int i = 0;
- for (Collection<User> cluster1 : clusters) {
- i++;
- ListIterator<Collection<User>> it2 = clusters.listIterator(i);
- while (it2.hasNext()) {
- Collection<User> cluster2 = it2.next();
- double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
- if (!Double.isNaN(similarity) &&
- (!full || similarity > queue.getLast().getSimilarity())) {
- ListIterator<ClusterClusterPair> queueIterator =
- queue.listIterator(queue.size());
- while (queueIterator.hasPrevious()) {
- if (similarity <= queueIterator.previous().getSimilarity()) {
- queueIterator.next();
- break;
- }
- }
- queueIterator.add(new ClusterClusterPair(cluster1, cluster2, similarity));
- if (full) {
- queue.removeLast();
- } else if (queue.size() > numUsers) { // use numUsers as queue size limit
- full = true;
- queue.removeLast();
- }
- }
- }
- }
+ LinkedList<ClusterClusterPair> queue = findClosestClusters(numUsers, clusters);
// The first one is definitely the closest pair in existence so we can cluster
// the two together, put it back into the set of clusters, and start again. Instead
@@ -435,6 +408,40 @@
return done;
}
+ private LinkedList<ClusterClusterPair> findClosestClusters(int numUsers, List<Collection<User>> clusters)
+ throws TasteException {
+ boolean full = false;
+ LinkedList<ClusterClusterPair> queue = new LinkedList<ClusterClusterPair>();
+ int i = 0;
+ for (Collection<User> cluster1 : clusters) {
+ i++;
+ ListIterator<Collection<User>> it2 = clusters.listIterator(i);
+ while (it2.hasNext()) {
+ Collection<User> cluster2 = it2.next();
+ double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
+ if (!Double.isNaN(similarity) &&
+ (!full || similarity > queue.getLast().getSimilarity())) {
+ ListIterator<ClusterClusterPair> queueIterator =
+ queue.listIterator(queue.size());
+ while (queueIterator.hasPrevious()) {
+ if (similarity <= queueIterator.previous().getSimilarity()) {
+ queueIterator.next();
+ break;
+ }
+ }
+ queueIterator.add(new ClusterClusterPair(cluster1, cluster2, similarity));
+ if (full) {
+ queue.removeLast();
+ } else if (queue.size() > numUsers) { // use numUsers as queue size limit
+ full = true;
+ queue.removeLast();
+ }
+ }
+ }
+ }
+ return queue;
+ }
+
private static Map<Object, List<RecommendedItem>> computeTopRecsPerUserID(Iterable<Collection<User>> clusters)
throws TasteException {
Map<Object, List<RecommendedItem>> recsPerUser = new FastMap<Object, List<RecommendedItem>>();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Sun Aug 31 09:25:09 2008
@@ -287,8 +287,8 @@
String label = cmdLine.getOptionValue(labelOpt.getOpt());
Analyzer analyzer;
if (cmdLine.hasOption(analyzerOpt.getOpt())) {
- analyzer = (Analyzer) Class.forName(
- cmdLine.getOptionValue(analyzerOpt.getOpt())).newInstance();
+ analyzer = Class.forName(
+ cmdLine.getOptionValue(analyzerOpt.getOpt())).asSubclass(Analyzer.class).newInstance();
} else {
analyzer = new StandardAnalyzer();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Sun Aug 31 09:25:09 2008
@@ -44,6 +44,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.nio.charset.Charset;
public class Classify {
@@ -85,8 +86,8 @@
log.info("Loading model from: {}", modelPaths);
- Model model = null;
- Classifier classifier = null;
+ Model model;
+ Classifier classifier;
String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
@@ -98,6 +99,8 @@
log.info("Testing Complementary Bayes Classifier");
model = new CBayesModel();
classifier = new CBayesClassifier();
+ } else {
+ throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
}
model = reader.loadModel(model, fs, modelPaths, conf);
@@ -119,8 +122,7 @@
Analyzer analyzer = null;
if (cmdLine.hasOption(analyzerOpt.getOpt())) {
String className = cmdLine.getOptionValue(analyzerOpt.getOpt());
- Class clazz = Class.forName(className);
- analyzer = (Analyzer) clazz.newInstance();
+ analyzer = Class.forName(className).asSubclass(Analyzer.class).newInstance();
}
if (analyzer == null) {
analyzer = new StandardAnalyzer();
@@ -134,7 +136,7 @@
}
log.info("Converting input document to proper format");
- String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(docPath), encoding));
+ String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(docPath), Charset.forName(encoding)));
StringBuilder line = new StringBuilder();
for(String token : document)
{
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java Sun Aug 31 09:25:09 2008
@@ -25,6 +25,7 @@
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
+import java.util.Deque;
/**
@@ -54,7 +55,7 @@
}
}
- LinkedList<ClassifierResult> result = new LinkedList<ClassifierResult>();
+ Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
while ((tmp = (ClassifierResult) pq.pop()) != null) {
result.addLast(tmp);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java Sun Aug 31 09:25:09 2008
@@ -45,7 +45,7 @@
float numerator = result + alpha_i;
float denominator =(sumLabelWeight + vocabCount);
- float weight = new Double(Math.log(numerator /denominator)).floatValue();
+ float weight = (float) Math.log(numerator /denominator);
result = (-1.0f * (weight));
return result;
@@ -98,7 +98,7 @@
float D_ij = getWeightUnprocessed(label, feature);
float sumLabelWeight = getSumLabelWeight(label);
// TODO srowen says sigma_j is unused
- float sigma_j = getSumFeatureWeight(feature);
+ //float sigma_j = getSumFeatureWeight(feature);
float numerator = D_ij + alpha_i;
float denominator = sumLabelWeight + vocabCount;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureDriver.java Sun Aug 31 09:25:09 2008
@@ -87,7 +87,7 @@
dfs.delete(outPath, true);
DefaultStringifier<Integer> intStringifier = new DefaultStringifier<Integer>(conf, Integer.class);
- String gramSizeString = intStringifier.toString(new Integer(gramSize));
+ String gramSizeString = intStringifier.toString(Integer.valueOf(gramSize));
Integer retGramSize = intStringifier.fromString(gramSizeString);
log.info("{}", retGramSize);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java Sun Aug 31 09:25:09 2008
@@ -26,14 +26,12 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.classifier.BayesFileFormatter;
-import org.apache.mahout.common.Model;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
-import java.util.List;
/**
* Reads the input train set(preprocessed using the {@link BayesFileFormatter}).
@@ -66,7 +64,7 @@
public void map(Text key, Text value,
OutputCollector<Text, FloatWritable> output, Reporter reporter)
throws IOException {
- String line = value.toString();
+ //String line = value.toString();
String label = key.toString();
int keyLen = label.length();
@@ -77,7 +75,7 @@
builder.ensureCapacity(32);// make sure we have a reasonably size buffer to
// begin with
// TODO: srowen says this var isn't used right now
- List<String> previousN_1Grams = Model.generateNGramsWithoutLabel(line, keyLen);
+ //List<String> previousN_1Grams = Model.generateNGramsWithoutLabel(line, keyLen);
double lengthNormalisation = 0.0;
for (double D_kj : wordList.values()) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java Sun Aug 31 09:25:09 2008
@@ -201,7 +201,7 @@
// the key is either _label_ or label,feature
while (reader.next(key, value)) {
String keyStr = key.toString();
-
+ // TODO srowen says we should probably collapse these empty branches?
if (keyStr.startsWith("_")) {
} else if (keyStr.startsWith(",")) {
@@ -211,8 +211,9 @@
} else {
int idx = keyStr.indexOf(",");
if (idx != -1) {
+ // TODO srowen says data is not used?
Map<String, Float> data = new HashMap<String, Float>();
- data.put(keyStr.substring(0, idx), new Float(value.get()));
+ data.put(keyStr.substring(0, idx), value.get());
writer.append(new Text(key.toString()), value);
}
}
@@ -237,7 +238,7 @@
while (reader.next(key, value)) {
String keyStr = key.toString();
if (keyStr.startsWith("_")) { // Sum of weights of labels
- labelSum.put(keyStr.substring(1), new Float(value.get()));
+ labelSum.put(keyStr.substring(1), value.get());
}
}
@@ -260,7 +261,7 @@
while (reader.next(key, value)) {
String keyStr = key.toString();
if (keyStr.startsWith("_")) { // Count of Documents in a Label
- labelDocumentCounts.put(keyStr.substring(1), new Float(value.get()));
+ labelDocumentCounts.put(keyStr.substring(1), value.get());
}
}
@@ -285,7 +286,7 @@
if (weightSum.size() > 1) {
throw new IOException("Incorrect Sum File");
} else if (keyStr.startsWith("*")) {
- weightSum.put(keyStr, new Float(value.get()));
+ weightSum.put(keyStr, value.get());
}
}
@@ -311,7 +312,7 @@
throw new IOException("Incorrect vocabCount File");
}
if (keyStr.startsWith("*")) {
- weightSum.put(keyStr, new Float(value.get()));
+ weightSum.put(keyStr, value.get());
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesClassifier.java Sun Aug 31 09:25:09 2008
@@ -25,6 +25,7 @@
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
+import java.util.Deque;
/**
* Classifies documents based on a {@link CBayesModel}.
@@ -52,7 +53,7 @@
}
}
- LinkedList<ClassifierResult> result = new LinkedList<ClassifierResult>();
+ Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
while ((tmp = (ClassifierResult) pq.pop()) != null) {
result.addLast(tmp);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesModel.java Sun Aug 31 09:25:09 2008
@@ -42,7 +42,7 @@
float numerator = sigma_j - result + alpha_i;
float denominator =(sigma_jSigma_k - sumLabelWeight + vocabCount);
- float weight = new Double(Math.log(numerator /denominator)).floatValue();
+ float weight = (float) Math.log(numerator /denominator);
result = (-1.0f * (weight / getThetaNormalizer(label)));
return result;
}
@@ -81,7 +81,7 @@
log.info("{}", thetaNormalizer);
/*for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
- thetaNormalizer.put(label, new Float(0));
+ thetaNormalizer.put(label, 0.0f);
}
for (int feature = 0, maxFeatures = featureList.size(); feature < maxFeatures; feature++) {
for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Sun Aug 31 09:25:09 2008
@@ -250,7 +250,7 @@
String id = formattedString.substring(0, beginIndex);
String centroid = formattedString.substring(beginIndex);
if (id.startsWith("C")) {
- int canopyId = new Integer(formattedString.substring(1, beginIndex - 2));
+ int canopyId = Integer.parseInt(formattedString.substring(1, beginIndex - 2));
Vector canopyCentroid = AbstractVector.decodeVector(centroid);
return new Canopy(canopyCentroid, canopyId);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java Sun Aug 31 09:25:09 2008
@@ -39,8 +39,8 @@
String clusters = args[index++];
String output = args[index++];
String measureClass = args[index++];
- double convergenceDelta = new Double(args[index++]);
- int maxIterations = new Integer(args[index++]);
+ double convergenceDelta = Double.parseDouble(args[index++]);
+ int maxIterations = Integer.parseInt(args[index++]);
int numMapTasks = Integer.parseInt(args[index++]);
boolean doCanopy = Boolean.parseBoolean(args[index++]);
int m = Integer.parseInt(args[index++]);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java Sun Aug 31 09:25:09 2008
@@ -85,7 +85,6 @@
* Configure the mapper with the cluster info
*
* @param job
- * @param clusters
*/
protected void configureWithClusterInfo(JobConf job) {
// Get the path location where the cluster Info is stored
@@ -116,7 +115,7 @@
try {
Text key = new Text();
Text value = new Text();
- int counter = 1;
+ //int counter = 1;
while (reader.next(key, value)) {
// get the cluster info
SoftCluster cluster = SoftCluster.decodeCluster(value.toString());
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java Sun Aug 31 09:25:09 2008
@@ -50,7 +50,7 @@
private static int nextClusterId = 0;
// this cluster's clusterId
- private int clusterId;
+ private final int clusterId;
// the current center
private Vector center = new SparseVector(0);
@@ -92,10 +92,8 @@
String id = formattedString.substring(0, beginIndex);
String center = formattedString.substring(beginIndex);
if (id.startsWith("C") || id.startsWith("V")) {
- int clusterId = new Integer(formattedString.substring(1, beginIndex - 2));
- Vector clusterCenter = null;
-
- clusterCenter = AbstractVector.decodeVector(center);
+ int clusterId = Integer.parseInt(formattedString.substring(1, beginIndex - 2));
+ Vector clusterCenter = AbstractVector.decodeVector(center);
SoftCluster cluster = new SoftCluster(clusterCenter, clusterId);
cluster.converged = id.startsWith("V");
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java Sun Aug 31 09:25:09 2008
@@ -87,8 +87,7 @@
*/
public static void configure(JobConf job) {
try {
- Class<?> cl = Class.forName(job.get(DISTANCE_MEASURE_KEY));
- measure = (DistanceMeasure) cl.newInstance();
+ measure = Class.forName(job.get(DISTANCE_MEASURE_KEY)).asSubclass(DistanceMeasure.class).newInstance();
measure.configure(job);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java Sun Aug 31 09:25:09 2008
@@ -30,7 +30,6 @@
/**
* The Base Model Class. Currently there are some Bayes Model elements which have to be refactored out later.
- *
*/
public abstract class Model {
@@ -50,9 +49,9 @@
protected final Map<Integer, Float> thetaNormalizer = new HashMap<Integer, Float>();
- protected Float sigma_jSigma_k = new Float(0);
+ protected float sigma_jSigma_k = 0.0f;
- protected final Float alpha_i = 1.0f; // alpha_i can be improved upon for increased smoothing
+ protected final float alpha_i = 1.0f; // alpha_i can be improved upon for increased smoothing
protected abstract float FeatureWeight(Integer label, Integer feature);
@@ -119,23 +118,23 @@
if (featureLabelWeights.size() <= feature) {
throw new IllegalStateException("This should not happen");
}
- featureLabelWeights.get(feature).put(label, new Float(weight));
+ featureLabelWeights.get(feature).put(label, weight);
}
protected void setSumFeatureWeight(Integer feature, float sum) {
if (sumFeatureWeight.size() != feature)
throw new IllegalStateException("This should not happen");
- sumFeatureWeight.add(feature, new Float(sum));
+ sumFeatureWeight.add(feature, sum);
}
protected void setSumLabelWeight(Integer label, float sum) {
if (sumLabelWeight.size() != label)
throw new IllegalStateException("This should not happen");
- sumLabelWeight.put(label, new Float(sum));
+ sumLabelWeight.put(label, sum);
}
protected void setThetaNormalizer(Integer label, float sum) {
- thetaNormalizer.put(label, new Float(sum));
+ thetaNormalizer.put(label, sum);
}
public void initializeWeightMatrix() {
@@ -145,7 +144,7 @@
featureLabelWeights.add(new HashMap<Integer, Float>(1));
}
- public void setSigma_jSigma_k(Float sigma_jSigma_k) {
+ public void setSigma_jSigma_k(float sigma_jSigma_k) {
this.sigma_jSigma_k = sigma_jSigma_k;
}
@@ -203,8 +202,7 @@
StringBuilder gramBuilder = new StringBuilder();
- for(String gram: previousN_1Grams)
- {
+ for (String gram : previousN_1Grams) {
gramBuilder.append(gram);
String token = gramBuilder.toString();
tokens.add(token);
@@ -232,12 +230,11 @@
StringBuilder gramBuilder = new StringBuilder();
- for(String gram: previousN_1Grams)
- {
+ for (String gram : previousN_1Grams) {
gramBuilder.append(gram);
String token = gramBuilder.toString();
tokens.add(token);
- gramBuilder.append(" ");
+ gramBuilder.append(' ');
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java Sun Aug 31 09:25:09 2008
@@ -213,7 +213,7 @@
String vectorClassName = new String(buf);
Vector vector;
try {
- vector = (Vector) Class.forName(vectorClassName).newInstance();
+ vector = Class.forName(vectorClassName).asSubclass(Vector.class).newInstance();
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
} catch (IllegalAccessException e) {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/LoadTest.java Sun Aug 31 09:25:09 2008
@@ -63,7 +63,12 @@
private static final int NUM_PREFS = 20;
private static final int NUM_THREADS = 4;
- private final Random random = RandomUtils.getRandom();
+ private Random random;
+
+ public void setUp() throws Exception {
+ super.setUp();
+ random = RandomUtils.getRandom();
+ }
public void testSlopeOneLoad() throws Exception {
DataModel model = createModel();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/IteratorUtilsTest.java Sun Aug 31 09:25:09 2008
@@ -17,15 +17,15 @@
package org.apache.mahout.cf.taste.impl.common;
-import junit.framework.TestCase;
-
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.TreeSet;
-public final class IteratorUtilsTest extends TestCase {
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+
+public final class IteratorUtilsTest extends TasteTestCase {
private static final List<String> TEST_DATA;
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java Sun Aug 31 09:25:09 2008
@@ -17,8 +17,8 @@
package org.apache.mahout.cf.taste.impl.common;
-import junit.framework.TestCase;
import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
import java.util.Collection;
import java.util.HashSet;
@@ -26,7 +26,7 @@
/**
* Tests {@link RefreshHelper}
*/
-public final class RefreshHelperTest extends TestCase {
+public final class RefreshHelperTest extends TasteTestCase {
public void testCallable() {
MockRefreshable mock = new MockRefreshable();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluatorTest.java Sun Aug 31 09:25:09 2008
@@ -37,7 +37,7 @@
RecommenderEvaluator evaluator =
new AverageAbsoluteDifferenceRecommenderEvaluator();
double eval = evaluator.evaluate(builder, model, 0.75, 1.0);
- assertEquals(0.26387685767414826, eval, EPSILON);
+ assertEquals(0.12387145540086855, eval, EPSILON);
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluatorTest.java Sun Aug 31 09:25:09 2008
@@ -36,7 +36,7 @@
};
RecommenderEvaluator evaluator = new RMSRecommenderEvaluator();
double eval = evaluator.evaluate(builder, model, 0.75, 1.0);
- assertEquals(0.26387685767414826, eval, EPSILON);
+ assertEquals(0.15553128261991583, eval, EPSILON);
}
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorerTest.java Sun Aug 31 09:25:09 2008
@@ -17,9 +17,9 @@
package org.apache.mahout.cf.taste.impl.recommender;
-import junit.framework.TestCase;
import org.apache.mahout.cf.taste.impl.model.GenericItem;
import org.apache.mahout.cf.taste.impl.model.GenericUser;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
@@ -30,7 +30,7 @@
/**
* <p>Tests {@link NullRescorer}.</p>
*/
-public final class NullRescorerTest extends TestCase {
+public final class NullRescorerTest extends TasteTestCase {
public void testItemRescorer() throws Exception {
Rescorer<Item> rescorer = NullRescorer.getItemInstance();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java Sun Aug 31 09:25:09 2008
@@ -24,9 +24,11 @@
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.io.Writer;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
public class BayesFileFormatterTest extends TestCase {
@@ -52,7 +54,7 @@
words = new String[]{"dog", "cat", "fish", "snake", "zebra"};
for (String word : words) {
File file = new File(input, word);
- FileWriter writer = new FileWriter(file);
+ Writer writer = new OutputStreamWriter(new FileOutputStream(file), Charset.forName("UTF-8"));
writer.write(word);
writer.close();
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Sun Aug 31 09:25:09 2008
@@ -37,14 +37,17 @@
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.nio.charset.Charset;
public class TestCanopyCreation extends TestCase {
static final double[][] raw = { { 1, 1 }, { 2, 1 }, { 1, 2 }, { 2, 2 },
@@ -144,11 +147,11 @@
private void writePointsToFileWithPayload(List<Vector> points,
String fileName, String payload) throws IOException {
- BufferedWriter output = new BufferedWriter(new FileWriter(fileName));
+ BufferedWriter output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), Charset.forName("UTF-8")));
for (Vector point : points) {
output.write(point.asFormatString());
output.write(payload);
- output.write("\n");
+ output.write('\n');
}
output.flush();
output.close();
@@ -674,8 +677,8 @@
// now run the Job
CanopyClusteringJob.runJob("testdata", "output",
ManhattanDistanceMeasure.class.getName(), 3.1, 2.1);
- BufferedReader reader = new BufferedReader(new FileReader(
- "output/clusters/part-00000"));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+ "output/clusters/part-00000"), Charset.forName("UTF-8")));
int count = 0;
while (reader.ready()) {
System.out.println(reader.readLine());
@@ -702,8 +705,8 @@
// now run the Job
CanopyClusteringJob.runJob("testdata", "output",
EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
- BufferedReader reader = new BufferedReader(new FileReader(
- "output/clusters/part-00000"));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+ "output/clusters/part-00000"), Charset.forName("UTF-8")));
int count = 0;
while (reader.ready()) {
System.out.println(reader.readLine());
@@ -732,8 +735,8 @@
// now run the Job
CanopyClusteringJob.runJob("testdata", "output",
ManhattanDistanceMeasure.class.getName(), 3.1, 2.1);
- BufferedReader reader = new BufferedReader(new FileReader(
- "output/clusters/part-00000"));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+ "output/clusters/part-00000"), Charset.forName("UTF-8")));
int count = 0;
while (reader.ready()) {
String line = reader.readLine();
@@ -764,8 +767,8 @@
// now run the Job
CanopyClusteringJob.runJob("testdata", "output",
EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
- BufferedReader reader = new BufferedReader(new FileReader(
- "output/clusters/part-00000"));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+ "output/clusters/part-00000"), Charset.forName("UTF-8")));
int count = 0;
while (reader.ready()) {
String line = reader.readLine();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java Sun Aug 31 09:25:09 2008
@@ -18,12 +18,13 @@
import java.io.BufferedReader;
import java.io.File;
-import java.io.FileReader;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.nio.charset.Charset;
import junit.framework.TestCase;
@@ -63,7 +64,7 @@
// Shift the decimal the correct number of places
// to the right.
- val = val * factor;
+ val *= factor;
// Round to the nearest integer.
long tmp = Math.round(val);
@@ -88,6 +89,7 @@
DistanceMeasure measure = (DistanceMeasure) cl.newInstance();
SoftCluster.config(measure, threshold);
boolean converged = false;
+ // TODO srowen notes that converged is always false?
for (int iter = 0; !converged && iter < numIter; iter++) {
iterateReference(points, clusterList, measure);
}
@@ -158,14 +160,9 @@
// run reference FuzzyKmeans algorithm
referenceFuzzyKMeans(points, clusterList, pointClusterInfo,
EuclideanDistanceMeasure.class.getName(), 0.001, 2);
- Iterator<Map.Entry<String, String>> iterator = pointClusterInfo
- .entrySet().iterator();
// iterate for each point
- while (iterator.hasNext()) {
- Map.Entry<String, String> entry = iterator.next();
- String value = entry.getValue();
-
+ for (String value : pointClusterInfo.values()) {
String clusterInfoStr = value.substring(1, value.length() - 1);
String[] clusterInfoList = clusterInfoStr.split(" ");
assertEquals("Number of clusters", k + 1, clusterInfoList.length);
@@ -232,8 +229,8 @@
assertTrue("output dir exists?", outDir.exists());
String[] outFiles = outDir.list();
assertEquals("output dir files?", 4, outFiles.length);
- BufferedReader reader = new BufferedReader(new FileReader(
- "output/points/part-00000"));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+ "output/points/part-00000"), Charset.forName("UTF-8")));
while (reader.ready()) {
String line = reader.readLine();
@@ -298,7 +295,8 @@
Map<String, Double> pointTotalProbMap = new HashMap<String, Double>();
for (String key : mapCollector.getKeys()) {
- SoftCluster cluster = SoftCluster.decodeCluster(key);
+ //SoftCluster cluster = SoftCluster.decodeCluster(key);
+ // TODO srowen says cluster is not used?
List<Text> values = mapCollector.getValue(key);
for (Text value : values) {
@@ -318,11 +316,8 @@
pointTotalProbMap.put(encodedVector, probVal + pointProb);
}
}
- Iterator<Map.Entry<String, Double>> iterator = pointTotalProbMap
- .entrySet().iterator();
- while (iterator.hasNext()) {
- Map.Entry<String, Double> entry = iterator.next();
+ for (Map.Entry<String, Double> entry : pointTotalProbMap.entrySet()) {
String key = entry.getKey();
double value = round(entry.getValue(), 1);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Sun Aug 31 09:25:09 2008
@@ -37,11 +37,14 @@
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
+import java.nio.charset.Charset;
public class TestKmeansClustering extends TestCase {
@@ -264,13 +267,13 @@
assertEquals("too many values", 1, values.size());
String value = values.get(0).toString();
int ix = value.indexOf(",");
- count += new Integer(value.substring(0, ix));
+ count += Integer.parseInt(value.substring(0, ix));
total = total
.plus(AbstractVector.decodeVector(value.substring(ix + 2)));
}
assertEquals("total points", 9, count);
- assertEquals("point total[0]", 27, (new Double(total.get(0))).intValue());
- assertEquals("point total[1]", 27, (new Double(total.get(1))).intValue());
+ assertEquals("point total[0]", 27, (int) total.get(0));
+ assertEquals("point total[1]", 27, (int) total.get(1));
}
}
@@ -399,8 +402,8 @@
assertTrue("output dir exists?", outDir.exists());
String[] outFiles = outDir.list();
assertEquals("output dir files?", 4, outFiles.length);
- BufferedReader reader = new BufferedReader(new FileReader(
- "output/points/part-00000"));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+ "output/points/part-00000"), Charset.forName("UTF-8")));
int[] expect = expectedNumPoints[k];
DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
while (reader.ready()) {
@@ -451,8 +454,8 @@
assertTrue("output dir exists?", outDir.exists());
String[] outFiles = outDir.list();
assertEquals("output dir files?", 4, outFiles.length);
- BufferedReader reader = new BufferedReader(new FileReader(
- "output/points/part-00000"));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
+ "output/points/part-00000"), Charset.forName("UTF-8")));
DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
while (reader.ready()) {
String line = reader.readLine();
@@ -468,11 +471,11 @@
public static void writePointsToFileWithPayload(List<Vector> points,
String fileName, String payload) throws IOException {
- BufferedWriter output = new BufferedWriter(new FileWriter(fileName));
+ BufferedWriter output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), Charset.forName("UTF-8")));
for (Vector point : points) {
output.write(point.asFormatString());
output.write(payload);
- output.write("\n");
+ output.write('\n');
}
output.flush();
output.close();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Sun Aug 31 09:25:09 2008
@@ -33,12 +33,14 @@
import java.io.BufferedWriter;
import java.io.File;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.nio.charset.Charset;
public class TestMeanShift extends TestCase {
@@ -73,11 +75,11 @@
*/
private void writePointsToFileWithPayload(Vector[] points, String fileName,
String payload) throws IOException {
- BufferedWriter output = new BufferedWriter(new FileWriter(fileName));
+ BufferedWriter output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), Charset.forName("UTF-8")));
for (Vector point : points) {
output.write(new MeanShiftCanopy(point).toString());
output.write(payload);
- output.write("\n");
+ output.write('\n');
}
output.flush();
output.close();
Modified: lucene/mahout/trunk/core/taste-build.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/taste-build.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/core/taste-build.xml (original)
+++ lucene/mahout/trunk/core/taste-build.xml Sun Aug 31 09:25:09 2008
@@ -26,14 +26,15 @@
<property name="axis.jars" location="lib/axis/"/>
<property name="dbcp.jar" location="lib/commons-dbcp-1.2.2.jar"/>
- <property name="junit.jar" location="lib/junit-3.8.2.jar"/>
+ <property name="junit.jar" location="../lib/junit-3.8.2.jar"/>
<property name="pool.jar" location="lib/commons-pool-1.4.jar"/>
<property name="proguard.jar" location="lib/proguard.jar"/>
<property name="servlet.jar" location="lib/servlet-api.jar"/>
- <property name="hadoop.jar" location="lib/hadoop-0.17.1-core.jar"/>
+ <property name="hadoop.jar" location="lib/hadoop-0.18.0-core.jar"/>
<property name="slf4j-api.jar" location="lib/slf4j-api-1.5.2.jar"/>
<property name="slf4j.jar" location="lib/slf4j-jcl-1.5.2.jar"/>
<property name="commons-logging.jar" location="lib/commons-logging-1.1.1.jar"/>
+ <property name="uncommons-math.jar" location="lib/uncommons-maths-1.0.2.jar"/>
<property environment="env"/>
<property name="name" value="${ant.project.name}"/>
@@ -48,8 +49,8 @@
<target depends="init" name="build" description="Compiles all code">
<mkdir dir="build"/>
- <javac source="1.5"
- target="1.5"
+ <javac source="6"
+ target="6"
deprecation="true"
destdir="build"
debug="true"
@@ -62,6 +63,7 @@
<pathelement location="${pool.jar}"/>
<pathelement location="${hadoop.jar}"/>
<pathelement location="${slf4j-api.jar}"/>
+ <pathelement location="${uncommons-math.jar}"/>
</classpath>
<include name="org/apache/mahout/cf/taste/**"/>
</javac>
@@ -94,7 +96,7 @@
<arg value="-dontobfuscate"/>
<arg value="-optimizationpasses 6"/>
<arg value="-keep public class * { public protected *; }"/>
- <arg value="-target 5"/>
+ <arg value="-target 6"/>
<arg value="-verbose"/>
</java>
<delete file="temp.jar"/>
@@ -102,8 +104,8 @@
<target depends="build" name="build-test" description="Builds test classes">
<mkdir dir="build-test"/>
- <javac source="1.5"
- target="1.5"
+ <javac source="6"
+ target="6"
debug="true"
debuglevel="lines,vars,source"
deprecation="true"
@@ -157,8 +159,8 @@
</not>
</condition>
</fail>
- <javac source="1.5"
- target="1.5"
+ <javac source="6"
+ target="6"
deprecation="true"
debug="true"
optimize="true"
@@ -204,6 +206,7 @@
<pathelement location="${slf4j-api.jar}"/>
<pathelement location="${slf4j.jar}"/>
<pathelement location="${commons-logging.jar}"/>
+ <pathelement location="${uncommons-math.jar}"/>
<pathelement location="${release-jar}"/>
<pathelement location="build-test"/>
</classpath>
Modified: lucene/mahout/trunk/examples/build.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/build.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/build.xml (original)
+++ lucene/mahout/trunk/examples/build.xml Sun Aug 31 09:25:09 2008
@@ -30,7 +30,7 @@
</tstamp>
<!-- Java Version we are compatible with -->
- <property name="java.compat.version" value="1.5" />
+ <property name="java.compat.version" value="6" />
<!-- Mahout Implementation Version -->
<!--
@@ -78,7 +78,7 @@
<property name="build.javadoc" value="${build.docs}/api"/>
<property name="javadoc.access" value="protected"/>
<property name="javadoc.link.java"
- value="http://java.sun.com/j2se/1.5.0/docs/api/"/>
+ value="http://java.sun.com/javase/6/docs/api/"/>
<property name="javadoc.link.junit"
value="http://junit.sourceforge.net/javadoc/"/>
<property name="javadoc.link.lucene"
Modified: lucene/mahout/trunk/examples/pom.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/pom.xml?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/pom.xml (original)
+++ lucene/mahout/trunk/examples/pom.xml Sun Aug 31 09:25:09 2008
@@ -25,8 +25,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<encoding>UTF-8</encoding>
- <source>1.5</source>
- <target>1.5</target>
+ <source>6</source>
+ <target>6</target>
</configuration>
</plugin>
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java Sun Aug 31 09:25:09 2008
@@ -26,13 +26,14 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
-import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
+import java.io.OutputStreamWriter;
import java.util.Map;
import java.util.NoSuchElementException;
+import java.nio.charset.Charset;
public final class GroupLensDataModel extends FileDataModel {
@@ -75,7 +76,7 @@
if (!resultFile.exists()) {
PrintWriter writer = null;
try {
- writer = new PrintWriter(new FileWriter(resultFile));
+ writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
for (String line : new FileLineIterable(originalFile)) {
String convertedLine;
if (ratings) {
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java Sun Aug 31 09:25:09 2008
@@ -98,8 +98,8 @@
log.info("Loading model from: {}", modelPaths);
- Model model = null;
- Classifier classifier = null;
+ Model model;
+ Classifier classifier;
String classifierType = cmdLine.getOptionValue(typeOpt.getOpt());
@@ -111,6 +111,8 @@
log.info("Testing Complementary Bayes Classifier");
model = new CBayesModel();
classifier = new CBayesClassifier();
+ } else {
+ throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
}
model = reader.loadModel(model, fs, modelPaths, conf);
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java?rev=690720&r1=690719&r2=690720&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/FileInfosDatasetTest.java Sun Aug 31 09:25:09 2008
@@ -23,12 +23,11 @@
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
-import java.io.FileReader;
import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.nio.charset.Charset;
-/**
- *
- */
public class FileInfosDatasetTest extends TestCase {
public void testRanges() throws IOException {
@@ -39,7 +38,7 @@
DataSet.initialize(dataset);
String filename = "build/test-classes/wdbc/wdbc.data";
- BufferedReader in = new BufferedReader(new FileReader(filename));
+ BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename), Charset.forName("UTF-8")));
String line;
DataLine dl = new DataLine();