You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/07/10 11:35:28 UTC
svn commit: r792856 [4/13] - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/common/
main/java/org/apache/mahout/cf/taste/eval/
main/java/org/apache/mahout/cf/taste/hadoop/
main/java/org/apache/mahout/cf/taste/impl/common/ main/j...
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Fri Jul 10 09:35:19 2009
@@ -20,11 +20,11 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastMap;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
-import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.Preference;
@@ -47,24 +47,21 @@
import java.util.concurrent.locks.ReentrantLock;
/**
- * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters
- * {@link org.apache.mahout.cf.taste.model.User}s, then determines
- * the clusters' top recommendations. This implementation builds clusters by repeatedly merging clusters
- * until only a certain number remain, meaning that each cluster is sort of a tree of other clusters.</p>
+ * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters {@link
+ * org.apache.mahout.cf.taste.model.User}s, then determines the clusters' top recommendations. This implementation
+ * builds clusters by repeatedly merging clusters until only a certain number remain, meaning that each cluster is sort
+ * of a tree of other clusters.</p>
*
- * <p>This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:</p>
- * <ul>
+ * <p>This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:</p> <ul>
* <li>For all {@link org.apache.mahout.cf.taste.model.User}s in a cluster, recommendations will be the same</li>
- * <li>{@link #estimatePreference(Object, Object)} may well return {@link Double#NaN}; it does so when asked
- * to estimate preference for an {@link org.apache.mahout.cf.taste.model.Item} for which no preference is expressed in the
- * {@link org.apache.mahout.cf.taste.model.User}s in the cluster.</li>
- * </ul>
+ * <li>{@link #estimatePreference(Object, Object)} may well return {@link Double#NaN}; it does so when asked to estimate
+ * preference for an {@link org.apache.mahout.cf.taste.model.Item} for which no preference is expressed in the {@link
+ * org.apache.mahout.cf.taste.model.User}s in the cluster.</li> </ul>
*
- * <p>This is an <em>experimental</em> implementation which tries to gain a lot of speed at the cost of
- * accuracy in building clusters, compared to {@link org.apache.mahout.cf.taste.impl.recommender.TreeClusteringRecommender}.
- * It will sometimes cluster two other clusters together that may not be the exact closest two clusters
- * in existence. This may not affect the recommendation quality much, but it potentially speeds up the
- * clustering process dramatically.</p>
+ * <p>This is an <em>experimental</em> implementation which tries to gain a lot of speed at the cost of accuracy in
+ * building clusters, compared to {@link org.apache.mahout.cf.taste.impl.recommender.TreeClusteringRecommender}. It will
+ * sometimes cluster two other clusters together that may not be the exact closest two clusters in existence. This may
+ * not affect the recommendation quality much, but it potentially speeds up the clustering process dramatically.</p>
*/
public final class TreeClusteringRecommender2 extends AbstractRecommender implements ClusteringRecommender {
@@ -82,12 +79,12 @@
private final RefreshHelper refreshHelper;
/**
- * @param dataModel {@link org.apache.mahout.cf.taste.model.DataModel} which provdes {@link org.apache.mahout.cf.taste.model.User}s
+ * @param dataModel {@link org.apache.mahout.cf.taste.model.DataModel} which provdes {@link
+ * org.apache.mahout.cf.taste.model.User}s
* @param clusterSimilarity {@link org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity} used to compute
- * cluster similarity
- * @param numClusters desired number of clusters to create
- * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is
- * less than 2
+ * cluster similarity
+ * @param numClusters desired number of clusters to create
+ * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is less than 2
*/
public TreeClusteringRecommender2(DataModel dataModel,
ClusterSimilarity clusterSimilarity,
@@ -116,13 +113,14 @@
}
/**
- * @param dataModel {@link org.apache.mahout.cf.taste.model.DataModel} which provdes {@link org.apache.mahout.cf.taste.model.User}s
- * @param clusterSimilarity {@link org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity} used to compute
- * cluster similarity
- * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger
- * clusters until the next two nearest clusters' similarity drops below this threshold
- * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>clusteringThreshold</code> is
- * {@link Double#NaN}
+ * @param dataModel {@link org.apache.mahout.cf.taste.model.DataModel} which provdes {@link
+ * org.apache.mahout.cf.taste.model.User}s
+ * @param clusterSimilarity {@link org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity} used to compute
+ * cluster similarity
+ * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger clusters until
+ * the next two nearest clusters' similarity drops below this threshold
+ * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>clusteringThreshold</code> is {@link
+ * Double#NaN}
*/
public TreeClusteringRecommender2(DataModel dataModel,
ClusterSimilarity clusterSimilarity,
@@ -152,7 +150,7 @@
@Override
public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
- throws TasteException {
+ throws TasteException {
if (userID == null) {
throw new IllegalArgumentException("userID is null");
}
@@ -271,8 +269,8 @@
}
ClusterClusterPair other = (ClusterClusterPair) o;
return cluster1.equals(other.cluster1) &&
- cluster2.equals(other.cluster2) &&
- similarity == other.similarity;
+ cluster2.equals(other.cluster2) &&
+ similarity == other.similarity;
}
@Override
@@ -327,7 +325,7 @@
}
}
- private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done)
+ private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done)
throws TasteException {
// We find a certain number of closest clusters...
LinkedList<ClusterClusterPair> queue = findClosestClusters(numUsers, clusters);
@@ -425,7 +423,7 @@
if (!Double.isNaN(similarity) &&
(!full || similarity > queue.getLast().getSimilarity())) {
ListIterator<ClusterClusterPair> queueIterator =
- queue.listIterator(queue.size());
+ queue.listIterator(queue.size());
while (queueIterator.hasPrevious()) {
if (similarity <= queueIterator.previous().getSimilarity()) {
queueIterator.next();
@@ -446,7 +444,7 @@
}
private static Map<Object, List<RecommendedItem>> computeTopRecsPerUserID(Iterable<Collection<User>> clusters)
- throws TasteException {
+ throws TasteException {
Map<Object, List<RecommendedItem>> recsPerUser = new FastMap<Object, List<RecommendedItem>>();
for (Collection<User> cluster : clusters) {
List<RecommendedItem> recs = computeTopRecsForCluster(cluster);
@@ -458,7 +456,7 @@
}
private static List<RecommendedItem> computeTopRecsForCluster(Collection<User> cluster)
- throws TasteException {
+ throws TasteException {
Collection<Item> allItems = new FastSet<Item>();
for (User user : cluster) {
@@ -471,7 +469,7 @@
TopItems.Estimator<Item> estimator = new Estimator(cluster);
List<RecommendedItem> topItems =
- TopItems.getTopItems(Integer.MAX_VALUE, allItems, null, estimator);
+ TopItems.getTopItems(Integer.MAX_VALUE, allItems, null, estimator);
log.debug("Recommendations are: {}", topItems);
return Collections.unmodifiableList(topItems);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java Fri Jul 10 09:35:19 2009
@@ -126,6 +126,6 @@
}
return x;
- }
+ }
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java Fri Jul 10 09:35:19 2009
@@ -18,26 +18,25 @@
package org.apache.mahout.cf.taste.impl.recommender.knn;
import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
+import org.apache.mahout.cf.taste.impl.common.Pair;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
-import org.apache.mahout.cf.taste.impl.common.Pair;
-import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
-import org.apache.mahout.cf.taste.recommender.Rescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import java.util.ArrayList;
-import java.util.List;
import java.util.Collection;
+import java.util.List;
/**
- * <p>The weights to compute the final predicted preferences are calculated
- * using linear interpolation, through an {@link Optimizer}. This algorithm is based in
- * the paper of Robert M. Bell and Yehuda Koren in ICDM '07.</p>
+ * <p>The weights to compute the final predicted preferences are calculated using linear interpolation, through an
+ * {@link Optimizer}. This algorithm is based in the paper of Robert M. Bell and Yehuda Koren in ICDM '07.</p>
*/
public final class KnnItemBasedRecommender extends GenericItemBasedRecommender {
@@ -117,14 +116,14 @@
protected double doEstimatePreference(User theUser, Item item) throws TasteException {
Collection<Item> allItems = new FastSet<Item>();
- for(Preference pref: theUser.getPreferencesAsArray()){
+ for (Preference pref : theUser.getPreferencesAsArray()) {
allItems.add(pref.getItem());
}
allItems.remove(item);
List<RecommendedItem> mostSimilar = mostSimilarItems(item.getID(), allItems, neighborhoodSize, null);
List<Item> theNeighborhood = new ArrayList<Item>(mostSimilar.size());
- for (RecommendedItem rec: mostSimilar) {
+ for (RecommendedItem rec : mostSimilar) {
theNeighborhood.add(rec.getItem());
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/NonNegativeQuadraticOptimizer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/NonNegativeQuadraticOptimizer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/NonNegativeQuadraticOptimizer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/NonNegativeQuadraticOptimizer.java Fri Jul 10 09:35:19 2009
@@ -20,9 +20,8 @@
import java.util.Arrays;
/**
- * Non-negative Quadratic Optimization.
- * Based on the paper of Robert M. Bell and Yehuda Koren in ICDM '07.
- * Thanks to Dan Tillberg for the hints in the implementation.
+ * Non-negative Quadratic Optimization. Based on the paper of Robert M. Bell and Yehuda Koren in ICDM '07. Thanks to Dan
+ * Tillberg for the hints in the implementation.
*/
public final class NonNegativeQuadraticOptimizer implements Optimizer {
@@ -104,6 +103,6 @@
} while (rdot > 0.1);
return x;
- }
+ }
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java Fri Jul 10 09:35:19 2009
@@ -23,12 +23,12 @@
import org.apache.mahout.cf.taste.impl.common.CompactRunningAverage;
import org.apache.mahout.cf.taste.impl.common.CompactRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.FastMap;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
-import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.Preference;
@@ -46,8 +46,8 @@
import java.util.concurrent.locks.ReentrantReadWriteLock;
/**
- * <p>An implementation of {@link DiffStorage} that merely stores item-item diffs in memory.
- * It is fast, but can consume a great deal of memory.</p>
+ * <p>An implementation of {@link DiffStorage} that merely stores item-item diffs in memory. It is fast, but can consume
+ * a great deal of memory.</p>
*/
public final class MemoryDiffStorage implements DiffStorage {
@@ -66,29 +66,24 @@
/**
* <p>Creates a new {@link MemoryDiffStorage}.</p>
*
- * <p>See {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender} for the
- * meaning of <code>stdDevWeighted</code>. If <code>compactAverages</code>
- * is set, this uses alternate data structures ({@link CompactRunningAverage} versus
- * {@link FullRunningAverage}) that use almost 50% less memory but store item-item
- * averages less accurately. <code>maxEntries</code> controls the maximum number of item-item average
- * preference differences that will be tracked internally. After the limit is reached,
- * if a new item-item pair is observed in the data it will be ignored. This is recommended for large datasets.
- * The first <code>maxEntries</code>
- * item-item pairs observed in the data are tracked. Assuming that item ratings are reasonably distributed
- * among users, this should only ignore item-item pairs that are very infrequently co-rated by a user.
- * The intuition is that data on these infrequently co-rated item-item pairs is less reliable and should
- * be the first that is ignored. This parameter can be used to limit the memory requirements of
- * {@link SlopeOneRecommender}, which otherwise grow as the square
- * of the number of items that exist in the {@link DataModel}. Memory requirements can reach gigabytes
- * with only about 10000 items, so this may be necessary on larger datasets.
+ * <p>See {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender} for the meaning of
+ * <code>stdDevWeighted</code>. If <code>compactAverages</code> is set, this uses alternate data structures ({@link
+ * CompactRunningAverage} versus {@link FullRunningAverage}) that use almost 50% less memory but store item-item
+ * averages less accurately. <code>maxEntries</code> controls the maximum number of item-item average preference
+ * differences that will be tracked internally. After the limit is reached, if a new item-item pair is observed in the
+ * data it will be ignored. This is recommended for large datasets. The first <code>maxEntries</code> item-item pairs
+ * observed in the data are tracked. Assuming that item ratings are reasonably distributed among users, this should
+ * only ignore item-item pairs that are very infrequently co-rated by a user. The intuition is that data on these
+ * infrequently co-rated item-item pairs is less reliable and should be the first that is ignored. This parameter can
+ * be used to limit the memory requirements of {@link SlopeOneRecommender}, which otherwise grow as the square of the
+ * number of items that exist in the {@link DataModel}. Memory requirements can reach gigabytes with only about 10000
+ * items, so this may be necessary on larger datasets.
*
- * @param dataModel
- * @param stdDevWeighted see {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}
- * @param compactAverages if <code>true</code>,
- * use {@link CompactRunningAverage} instead of {@link FullRunningAverage} internally
- * @param maxEntries maximum number of item-item average preference differences to track internally
- * @throws IllegalArgumentException if <code>maxEntries</code> is not positive or <code>dataModel</code>
- * is null
+ * @param stdDevWeighted see {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}
+ * @param compactAverages if <code>true</code>, use {@link CompactRunningAverage} instead of {@link
+ * FullRunningAverage} internally
+ * @param maxEntries maximum number of item-item average preference differences to track internally
+ * @throws IllegalArgumentException if <code>maxEntries</code> is not positive or <code>dataModel</code> is null
*/
public MemoryDiffStorage(DataModel dataModel,
Weighting stdDevWeighted,
@@ -142,8 +137,8 @@
return null;
}
return stdDevWeighted ?
- new InvertedRunningAverageAndStdDev((RunningAverageAndStdDev) average) :
- new InvertedRunningAverage(average);
+ new InvertedRunningAverageAndStdDev((RunningAverageAndStdDev) average) :
+ new InvertedRunningAverage(average);
} else {
return average;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java Fri Jul 10 09:35:19 2009
@@ -17,10 +17,10 @@
package org.apache.mahout.cf.taste.impl.recommender.slopeone;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.common.Weighting;
-import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
@@ -42,8 +42,8 @@
/**
* <p>A basic "slope one" recommender. (See an <a href="http://www.daniel-lemire.com/fr/abstracts/SDM2005.html">
- * excellent summary here</a> for example.) This {@link org.apache.mahout.cf.taste.recommender.Recommender} is especially
- * suitable when user preferences are updating frequently as it can incorporate this information without
+ * excellent summary here</a> for example.) This {@link org.apache.mahout.cf.taste.recommender.Recommender} is
+ * especially suitable when user preferences are updating frequently as it can incorporate this information without
* expensive recomputation.</p>
*
* <p>This implementation can also be used as a "weighted slope one" recommender.</p>
@@ -63,23 +63,22 @@
*/
public SlopeOneRecommender(DataModel dataModel) throws TasteException {
this(dataModel,
- Weighting.WEIGHTED,
- Weighting.WEIGHTED,
- new MemoryDiffStorage(dataModel, Weighting.WEIGHTED, false, Long.MAX_VALUE));
+ Weighting.WEIGHTED,
+ Weighting.WEIGHTED,
+ new MemoryDiffStorage(dataModel, Weighting.WEIGHTED, false, Long.MAX_VALUE));
}
/**
* <p>Creates a {@link SlopeOneRecommender} based on the given {@link DataModel}.</p>
*
- * <p>If <code>weighted</code> is set, acts as a weighted slope one recommender.
- * This implementation also includes an experimental "standard deviation" weighting which weights
- * item-item ratings diffs with lower standard deviation more highly, on the theory that they are more
- * reliable.</p>
+ * <p>If <code>weighted</code> is set, acts as a weighted slope one recommender. This implementation also includes an
+ * experimental "standard deviation" weighting which weights item-item ratings diffs with lower standard deviation
+ * more highly, on the theory that they are more reliable.</p>
*
- * @param weighting if {@link Weighting#WEIGHTED}, acts as a weighted slope one recommender
+ * @param weighting if {@link Weighting#WEIGHTED}, acts as a weighted slope one recommender
* @param stdDevWeighting use optional standard deviation weighting of diffs
- * @throws IllegalArgumentException if <code>diffStorage</code> is null, or stdDevWeighted is set
- * when weighted is not set
+ * @throws IllegalArgumentException if <code>diffStorage</code> is null, or stdDevWeighted is set when weighted is not
+ * set
*/
public SlopeOneRecommender(DataModel dataModel,
Weighting weighting,
@@ -99,7 +98,7 @@
@Override
public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
- throws TasteException {
+ throws TasteException {
if (userID == null) {
throw new IllegalArgumentException("userID is null");
}
@@ -205,7 +204,7 @@
@Override
public String toString() {
return "SlopeOneRecommender[weighted:" + weighted + ", stdDevWeighted:" + stdDevWeighted +
- ", diffStorage:" + diffStorage + ']';
+ ", diffStorage:" + diffStorage + ']';
}
private final class Estimator implements TopItems.Estimator<Item> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java Fri Jul 10 09:35:19 2009
@@ -19,10 +19,10 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.impl.common.IOUtils;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.JDBCDataModel;
import org.apache.mahout.cf.taste.model.Preference;
@@ -40,10 +40,10 @@
import java.util.concurrent.Callable;
/**
- * <p>A {@link DiffStorage} which stores diffs in a database. Database-specific implementations subclass
- * this abstract class. Note that this implementation has a fairly particular dependence on the
- * {@link org.apache.mahout.cf.taste.model.DataModel} used; it needs a {@link JDBCDataModel} attached to the same
- * database since its efficent operation depends on accessing preference data in the database directly.</p>
+ * <p>A {@link DiffStorage} which stores diffs in a database. Database-specific implementations subclass this abstract
+ * class. Note that this implementation has a fairly particular dependence on the {@link
+ * org.apache.mahout.cf.taste.model.DataModel} used; it needs a {@link JDBCDataModel} attached to the same database
+ * since its efficent operation depends on accessing preference data in the database directly.</p>
*/
public abstract class AbstractJDBCDiffStorage implements DiffStorage {
@@ -139,7 +139,7 @@
@Override
public RunningAverage[] getDiffs(Object userID, Object itemID, Preference[] prefs)
- throws TasteException {
+ throws TasteException {
int size = prefs.length;
RunningAverage[] result = new RunningAverage[size];
Connection conn = null;
@@ -202,7 +202,7 @@
@Override
public void updateItemPref(Object itemID, double prefDelta, boolean remove)
- throws TasteException {
+ throws TasteException {
Connection conn = null;
try {
conn = dataSource.getConnection();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Fri Jul 10 09:35:19 2009
@@ -18,25 +18,21 @@
package org.apache.mahout.cf.taste.impl.recommender.slopeone.jdbc;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel;
import org.apache.mahout.cf.taste.impl.model.jdbc.AbstractJDBCDataModel;
+import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel;
/**
- * <p>MySQL-specific implementation. Should be used in conjunction with a
- * {@link MySQLJDBCDataModel}. This implementation stores item-item diffs in a MySQL
- * database and encapsulates some other slope-one-specific operations that are needed
- * on the preference data in the database. It assumes the database has a schema like:</p>
+ * <p>MySQL-specific implementation. Should be used in conjunction with a {@link MySQLJDBCDataModel}. This
+ * implementation stores item-item diffs in a MySQL database and encapsulates some other slope-one-specific operations
+ * that are needed on the preference data in the database. It assumes the database has a schema like:</p>
*
- * <table>
- * <tr><th>item_id_a</th><th>item_id_b</th><th>average_diff</th><th>count</th></tr>
- * <tr><td>123</td><td>234</td><td>0.5</td><td>5</td></tr>
- * <tr><td>123</td><td>789</td><td>-1.33</td><td>3</td></tr>
- * <tr><td>234</td><td>789</td><td>2.1</td><td>1</td></tr>
- * </table>
+ * <table> <tr><th>item_id_a</th><th>item_id_b</th><th>average_diff</th><th>count</th></tr>
+ * <tr><td>123</td><td>234</td><td>0.5</td><td>5</td></tr> <tr><td>123</td><td>789</td><td>-1.33</td><td>3</td></tr>
+ * <tr><td>234</td><td>789</td><td>2.1</td><td>1</td></tr> </table>
*
- * <p><code>item_id_a</code> and <code>item_id_b</code> must have type compatible with
- * the Java <code>String</code> type. <code>average_diff</code> must be compatible with
- * <code>double</code> and <code>count</code> must be compatible with <code>int</code>.</p>
+ * <p><code>item_id_a</code> and <code>item_id_b</code> must have type compatible with the Java <code>String</code>
+ * type. <code>average_diff</code> must be compatible with <code>double</code> and <code>count</code> must be compatible
+ * with <code>int</code>.</p>
*
* <p>The following command sets up a suitable table in MySQL:</p>
*
@@ -58,12 +54,12 @@
public MySQLJDBCDiffStorage(AbstractJDBCDataModel dataModel) throws TasteException {
this(dataModel,
- DEFAULT_DIFF_TABLE,
- DEFAULT_ITEM_A_COLUMN,
- DEFAULT_ITEM_B_COLUMN,
- DEFAULT_COUNT_COLUMN,
- DEFAULT_AVERAGE_DIFF_COLUMN,
- DEFAULT_MIN_DIFF_COUNT);
+ DEFAULT_DIFF_TABLE,
+ DEFAULT_ITEM_A_COLUMN,
+ DEFAULT_ITEM_B_COLUMN,
+ DEFAULT_COUNT_COLUMN,
+ DEFAULT_AVERAGE_DIFF_COLUMN,
+ DEFAULT_MIN_DIFF_COUNT);
}
public MySQLJDBCDiffStorage(AbstractJDBCDataModel dataModel,
@@ -74,55 +70,55 @@
String avgColumn,
int minDiffCount) throws TasteException {
super(dataModel,
- // getDiffSQL
- "SELECT " + countColumn + ", " + avgColumn + " FROM " + diffsTable +
- " WHERE " + itemIDAColumn + "=? AND " + itemIDBColumn + "=? UNION " +
- "SELECT " + countColumn + ", " + avgColumn + " FROM " + diffsTable +
- " WHERE " + itemIDAColumn + "=? AND " + itemIDBColumn + "=?",
- // getDiffsSQL
- "SELECT " + countColumn + ", " + avgColumn + ", " + itemIDAColumn + " FROM " + diffsTable + ", " +
- dataModel.getPreferenceTable() + " WHERE " + itemIDBColumn + "=? AND " + itemIDAColumn + " = " + dataModel.getItemIDColumn() +
- " AND " + dataModel.getUserIDColumn() + "=? ORDER BY " + itemIDAColumn,
- // getAverageItemPrefSQL
- "SELECT COUNT(1), AVG(" + dataModel.getPreferenceColumn() + ") FROM " + dataModel.getPreferenceTable() +
- " WHERE " + dataModel.getItemIDColumn() + "=?",
- // updateDiffSQLs
- new String[]{
- "UPDATE " + diffsTable + " SET " + avgColumn + " = " + avgColumn + " - (? / " + countColumn +
- ") WHERE " + itemIDAColumn + "=?",
- "UPDATE " + diffsTable + " SET " + avgColumn + " = " + avgColumn + " + (? / " + countColumn +
- ") WHERE " + itemIDBColumn + "=?"
- },
- // removeDiffSQL
- new String[]{
- "UPDATE " + diffsTable + " SET " + countColumn + " = " + countColumn + "-1, " +
- avgColumn + " = " + avgColumn + " * ((" + countColumn + " + 1) / CAST(" + countColumn +
- " AS DECIMAL)) + ? / CAST(" + countColumn + " AS DECIMAL) WHERE " + itemIDAColumn + "=?",
- "UPDATE " + diffsTable + " SET " + countColumn + " = " + countColumn + "-1, " +
- avgColumn + " = " + avgColumn + " * ((" + countColumn + " + 1) / CAST(" + countColumn +
- " AS DECIMAL)) - ? / CAST(" + countColumn + " AS DECIMAL) WHERE " + itemIDBColumn + "=?"
- },
- // getRecommendableItemsSQL
- "SELECT id FROM " +
- "(SELECT " + itemIDAColumn + " AS id FROM " + diffsTable + ", " + dataModel.getPreferenceTable() +
- " WHERE " + itemIDBColumn + " = " + dataModel.getItemIDColumn() + " AND " + dataModel.getUserIDColumn() + "=? UNION DISTINCT" +
- " SELECT " + itemIDBColumn + " AS id FROM " + diffsTable + ", " + dataModel.getPreferenceTable() +
- " WHERE " + itemIDAColumn + " = " + dataModel.getItemIDColumn() + " AND " + dataModel.getUserIDColumn() +
- "=?) possible_item_ids WHERE id NOT IN (SELECT " + dataModel.getItemIDColumn() + " FROM " + dataModel.getPreferenceTable() +
- " WHERE " + dataModel.getUserIDColumn() + "=?)",
- // deleteDiffsSQL
- "DELETE FROM " + diffsTable,
- // createDiffsSQL
- "INSERT INTO " + diffsTable + " (" + itemIDAColumn + ", " + itemIDBColumn + ", " + avgColumn +
- ", " + countColumn + ") SELECT prefsA." + dataModel.getItemIDColumn() + ", prefsB." + dataModel.getItemIDColumn() + ',' +
- " AVG(prefsB." + dataModel.getPreferenceColumn() + " - prefsA." + dataModel.getPreferenceColumn() + ")," +
- " COUNT(1) AS count FROM " + dataModel.getPreferenceTable() + " prefsA, " + dataModel.getPreferenceTable() + " prefsB WHERE prefsA." +
- dataModel.getUserIDColumn() + " = prefsB." + dataModel.getUserIDColumn() + " AND prefsA." + dataModel.getItemIDColumn() + " < prefsB." +
- dataModel.getItemIDColumn() + ' ' + " GROUP BY prefsA." + dataModel.getItemIDColumn() +
- ", prefsB." + dataModel.getItemIDColumn() + " HAVING count >=?",
- // diffsExistSQL
- "SELECT COUNT(1) FROM " + diffsTable,
- minDiffCount);
+ // getDiffSQL
+ "SELECT " + countColumn + ", " + avgColumn + " FROM " + diffsTable +
+ " WHERE " + itemIDAColumn + "=? AND " + itemIDBColumn + "=? UNION " +
+ "SELECT " + countColumn + ", " + avgColumn + " FROM " + diffsTable +
+ " WHERE " + itemIDAColumn + "=? AND " + itemIDBColumn + "=?",
+ // getDiffsSQL
+ "SELECT " + countColumn + ", " + avgColumn + ", " + itemIDAColumn + " FROM " + diffsTable + ", " +
+ dataModel.getPreferenceTable() + " WHERE " + itemIDBColumn + "=? AND " + itemIDAColumn + " = " + dataModel.getItemIDColumn() +
+ " AND " + dataModel.getUserIDColumn() + "=? ORDER BY " + itemIDAColumn,
+ // getAverageItemPrefSQL
+ "SELECT COUNT(1), AVG(" + dataModel.getPreferenceColumn() + ") FROM " + dataModel.getPreferenceTable() +
+ " WHERE " + dataModel.getItemIDColumn() + "=?",
+ // updateDiffSQLs
+ new String[]{
+ "UPDATE " + diffsTable + " SET " + avgColumn + " = " + avgColumn + " - (? / " + countColumn +
+ ") WHERE " + itemIDAColumn + "=?",
+ "UPDATE " + diffsTable + " SET " + avgColumn + " = " + avgColumn + " + (? / " + countColumn +
+ ") WHERE " + itemIDBColumn + "=?"
+ },
+ // removeDiffSQL
+ new String[]{
+ "UPDATE " + diffsTable + " SET " + countColumn + " = " + countColumn + "-1, " +
+ avgColumn + " = " + avgColumn + " * ((" + countColumn + " + 1) / CAST(" + countColumn +
+ " AS DECIMAL)) + ? / CAST(" + countColumn + " AS DECIMAL) WHERE " + itemIDAColumn + "=?",
+ "UPDATE " + diffsTable + " SET " + countColumn + " = " + countColumn + "-1, " +
+ avgColumn + " = " + avgColumn + " * ((" + countColumn + " + 1) / CAST(" + countColumn +
+ " AS DECIMAL)) - ? / CAST(" + countColumn + " AS DECIMAL) WHERE " + itemIDBColumn + "=?"
+ },
+ // getRecommendableItemsSQL
+ "SELECT id FROM " +
+ "(SELECT " + itemIDAColumn + " AS id FROM " + diffsTable + ", " + dataModel.getPreferenceTable() +
+ " WHERE " + itemIDBColumn + " = " + dataModel.getItemIDColumn() + " AND " + dataModel.getUserIDColumn() + "=? UNION DISTINCT" +
+ " SELECT " + itemIDBColumn + " AS id FROM " + diffsTable + ", " + dataModel.getPreferenceTable() +
+ " WHERE " + itemIDAColumn + " = " + dataModel.getItemIDColumn() + " AND " + dataModel.getUserIDColumn() +
+ "=?) possible_item_ids WHERE id NOT IN (SELECT " + dataModel.getItemIDColumn() + " FROM " + dataModel.getPreferenceTable() +
+ " WHERE " + dataModel.getUserIDColumn() + "=?)",
+ // deleteDiffsSQL
+ "DELETE FROM " + diffsTable,
+ // createDiffsSQL
+ "INSERT INTO " + diffsTable + " (" + itemIDAColumn + ", " + itemIDBColumn + ", " + avgColumn +
+ ", " + countColumn + ") SELECT prefsA." + dataModel.getItemIDColumn() + ", prefsB." + dataModel.getItemIDColumn() + ',' +
+ " AVG(prefsB." + dataModel.getPreferenceColumn() + " - prefsA." + dataModel.getPreferenceColumn() + ")," +
+ " COUNT(1) AS count FROM " + dataModel.getPreferenceTable() + " prefsA, " + dataModel.getPreferenceTable() + " prefsB WHERE prefsA." +
+ dataModel.getUserIDColumn() + " = prefsB." + dataModel.getUserIDColumn() + " AND prefsA." + dataModel.getItemIDColumn() + " < prefsB." +
+ dataModel.getItemIDColumn() + ' ' + " GROUP BY prefsA." + dataModel.getItemIDColumn() +
+ ", prefsB." + dataModel.getItemIDColumn() + " HAVING count >=?",
+ // diffsExistSQL
+ "SELECT COUNT(1) FROM " + diffsTable,
+ minDiffCount);
}
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ExpectationMaximizationSVD.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ExpectationMaximizationSVD.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ExpectationMaximizationSVD.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ExpectationMaximizationSVD.java Fri Jul 10 09:35:19 2009
@@ -21,9 +21,7 @@
import java.util.Random;
-/**
- * Calculates the SVD using an Expectation Maximization algorithm.
- */
+/** Calculates the SVD using an Expectation Maximization algorithm. */
public final class ExpectationMaximizationSVD {
private static final Random random = RandomUtils.getRandom();
@@ -98,6 +96,6 @@
int getK() {
return k;
- }
+ }
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java Fri Jul 10 09:35:19 2009
@@ -17,15 +17,6 @@
package org.apache.mahout.cf.taste.impl.recommender.svd;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.concurrent.Callable;
-
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
@@ -48,9 +39,17 @@
import org.slf4j.LoggerFactory;
import org.uncommons.maths.statistics.DataSet;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.Callable;
+
/**
- * <p>A {@link Recommender} which uses Single Value Decomposition to
- * find the main features of the {@link DataSet}.
+ * <p>A {@link Recommender} which uses Single Value Decomposition to find the main features of the {@link DataSet}.
* Thanks to Simon Funk for the hints in the implementation.
*/
public final class SVDRecommender extends AbstractRecommender {
@@ -69,19 +68,15 @@
private final List<Preference> cachedPreferences;
/**
- * @param dataModel
* @param numFeatures the number of features
* @param initialSteps number of initial training steps
*/
- public SVDRecommender(DataModel dataModel, int numFeatures, int initialSteps) throws TasteException{
+ public SVDRecommender(DataModel dataModel, int numFeatures, int initialSteps) throws TasteException {
this(dataModel, numFeatures);
train(initialSteps);
}
- /**
- * @param dataModel
- * @param numFeatures the number of features
- */
+ /** @param numFeatures the number of features */
public SVDRecommender(DataModel dataModel, int numFeatures) throws TasteException {
super(dataModel);
@@ -119,7 +114,7 @@
}
});
refreshHelper.addDependency(dataModel);
-
+
}
private void recachePreferences() throws TasteException {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java Fri Jul 10 09:35:19 2009
@@ -20,23 +20,21 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.common.Weighting;
-import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
-import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.transforms.SimilarityTransform;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.transforms.PreferenceTransform;
+import org.apache.mahout.cf.taste.transforms.SimilarityTransform;
+
import java.util.Collection;
import java.util.concurrent.Callable;
-/**
- * Abstract superclass encapsulating functionality that is common to most
- * implementations in this package.
- */
+/** Abstract superclass encapsulating functionality that is common to most implementations in this package. */
abstract class AbstractSimilarity implements UserSimilarity, ItemSimilarity {
private final DataModel dataModel;
@@ -48,16 +46,12 @@
private int cachedNumUsers;
private final RefreshHelper refreshHelper;
- /**
- * <p>Creates a normal (unweighted) {@link AbstractSimilarity}.</p>
- */
+ /** <p>Creates a normal (unweighted) {@link AbstractSimilarity}.</p> */
AbstractSimilarity(DataModel dataModel) throws TasteException {
this(dataModel, Weighting.UNWEIGHTED);
}
- /**
- * <p>Creates a possibly weighted {@link AbstractSimilarity}.</p>
- */
+ /** <p>Creates a possibly weighted {@link AbstractSimilarity}.</p> */
AbstractSimilarity(final DataModel dataModel, Weighting weighting) throws TasteException {
if (dataModel == null) {
throw new IllegalArgumentException("dataModel is null");
@@ -120,21 +114,21 @@
}
/**
- * <p>Several subclasses in this package implement this method to actually compute the similarity
- * from figures computed over users or items. Note that the computations in this class "center" the
- * data, such that X and Y's mean are 0.</p>
+ * <p>Several subclasses in this package implement this method to actually compute the similarity from figures
+ * computed over users or items. Note that the computations in this class "center" the data, such that X and Y's mean
+ * are 0.</p>
*
- * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
- * the standard similarity computations as a result.</p>
+ * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into the standard
+ * similarity computations as a result.</p>
*
- * @param n total number of users or items
- * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
- * both users/items
- * @param sumX2 sum of the square of user/item preference values, over the first item/user
- * @param sumY2 sum of the square of the user/item preference values, over the second item/user
+ * @param n total number of users or items
+ * @param sumXY sum of product of user/item preference values, over all items/users prefererred by both
+ * users/items
+ * @param sumX2 sum of the square of user/item preference values, over the first item/user
+ * @param sumY2 sum of the square of the user/item preference values, over the second item/user
* @param sumXYdiff2 sum of squares of differences in X and Y values
- * @return similarity value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no similarity
- * can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
+ * @return similarity value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no similarity can be computed
+ * (e.g. when no {@link Item}s have been rated by both {@link User}s
*/
abstract double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java Fri Jul 10 09:35:19 2009
@@ -19,7 +19,6 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.impl.common.Cache;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.Retriever;
@@ -28,13 +27,14 @@
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import java.util.Collection;
/**
- * <p>Implementations of this interface compute an inferred preference for a {@link User} and an {@link Item}
- * that the user has not expressed any preference for. This might be an average of other preferences scores
- * from that user, for example. This technique is sometimes called "default voting".</p>
+ * <p>Implementations of this interface compute an inferred preference for a {@link User} and an {@link Item} that the
+ * user has not expressed any preference for. This might be an average of other preferences scores from that user, for
+ * example. This technique is sometimes called "default voting".</p>
*/
public final class AveragingPreferenceInferrer implements PreferenceInferrer {
@@ -62,6 +62,7 @@
private static final class PrefRetriever implements Retriever<User, Double> {
private static final Double ZERO = 0.0;
+
@Override
public Double get(User key) {
RunningAverage average = new FullRunningAverage();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanLogLikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanLogLikelihoodSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanLogLikelihoodSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanLogLikelihoodSimilarity.java Fri Jul 10 09:35:19 2009
@@ -19,13 +19,13 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
-import org.apache.mahout.cf.taste.similarity.UserSimilarity;
-import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.FastSet;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.util.Collection;
@@ -52,11 +52,11 @@
int prefs1Size = prefs1.size();
int prefs2Size = prefs2.size();
int intersectionSize = prefs1Size < prefs2Size ?
- prefs2.intersectionSize(prefs1) :
- prefs1.intersectionSize(prefs2);
+ prefs2.intersectionSize(prefs1) :
+ prefs1.intersectionSize(prefs2);
int numItems = dataModel.getNumItems();
double logLikelihood =
- LogLikelihoodSimilarity.twoLogLambda(intersectionSize, prefs1.size() - intersectionSize, prefs2.size(), numItems - prefs2.size());
+ LogLikelihoodSimilarity.twoLogLambda(intersectionSize, prefs1.size() - intersectionSize, prefs2.size(), numItems - prefs2.size());
return 1.0 - 1.0 / (1.0 + logLikelihood);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java Fri Jul 10 09:35:19 2009
@@ -18,23 +18,22 @@
package org.apache.mahout.cf.taste.impl.similarity;
import org.apache.mahout.cf.taste.common.Refreshable;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
-import org.apache.mahout.cf.taste.similarity.UserSimilarity;
-import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.FastSet;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.util.Collection;
/**
- * <p>Variant of {@link TanimotoCoefficientSimilarity} which is appropriate
- * for use with the "boolean" classes like {@link BooleanPrefUser}.</p>
+ * <p>Variant of {@link TanimotoCoefficientSimilarity} which is appropriate for use with the "boolean" classes like
+ * {@link BooleanPrefUser}.</p>
*
- * <p>If you need an {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity},
- * just use {@link org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity},
- * even with "boolean" classes.</p>
+ * <p>If you need an {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity}, just use {@link
+ * org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity}, even with "boolean" classes.</p>
*/
public final class BooleanTanimotoCoefficientSimilarity implements UserSimilarity {
@@ -59,8 +58,8 @@
int prefs1Size = prefs1.size();
int prefs2Size = prefs2.size();
int intersectionSize = prefs1Size < prefs2Size ?
- prefs2.intersectionSize(prefs1) :
- prefs1.intersectionSize(prefs2);
+ prefs2.intersectionSize(prefs1) :
+ prefs1.intersectionSize(prefs2);
int unionSize = prefs1Size + prefs2Size - intersectionSize;
return (double) intersectionSize / (double) unionSize;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java Fri Jul 10 09:35:19 2009
@@ -19,19 +19,17 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.impl.common.Cache;
import org.apache.mahout.cf.taste.impl.common.Pair;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.Retriever;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import java.util.Collection;
-/**
- * Caches the results from an underlying {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} implementation.
- */
+/** Caches the results from an underlying {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} implementation. */
public final class CachingItemSimilarity implements ItemSimilarity {
private final ItemSimilarity similarity;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java Fri Jul 10 09:35:19 2009
@@ -19,20 +19,18 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
-import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.impl.common.Cache;
import org.apache.mahout.cf.taste.impl.common.Pair;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.Retriever;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.util.Collection;
-/**
- * Caches the results from an underlying {@link org.apache.mahout.cf.taste.similarity.UserSimilarity} implementation.
- */
+/** Caches the results from an underlying {@link org.apache.mahout.cf.taste.similarity.UserSimilarity} implementation. */
public final class CachingUserSimilarity implements UserSimilarity {
private final UserSimilarity similarity;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java Fri Jul 10 09:35:19 2009
@@ -22,13 +22,12 @@
import org.apache.mahout.cf.taste.model.DataModel;
/**
- * <p>An implementation of a "similarity" based on the Euclidean "distance" between two
- * {@link org.apache.mahout.cf.taste.model.User}s X and Y. Thinking of items as dimensions
- * and preferences as points along those dimensions, a distance is computed using all
- * items (dimensions) where both users have expressed a preference for that item. This
- * is simply the square root of the sum of the squares of differences in position (preference)
- * along each dimension. The similarity is then computed as 1 / (1 + distance), so the
- * resulting values are in the range (0,1].</p>
+ * <p>An implementation of a "similarity" based on the Euclidean "distance" between two {@link
+ * org.apache.mahout.cf.taste.model.User}s X and Y. Thinking of items as dimensions and preferences as points along
+ * those dimensions, a distance is computed using all items (dimensions) where both users have expressed a preference
+ * for that item. This is simply the square root of the sum of the squares of differences in position (preference) along
+ * each dimension. The similarity is then computed as 1 / (1 + distance), so the resulting values are in the range
+ * (0,1].</p>
*/
public final class EuclideanDistanceSimilarity extends AbstractSimilarity {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java Fri Jul 10 09:35:19 2009
@@ -19,7 +19,6 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.impl.common.FastMap;
import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
import org.apache.mahout.cf.taste.impl.common.IteratorUtils;
@@ -27,6 +26,7 @@
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import java.util.Collection;
import java.util.Iterator;
@@ -35,48 +35,42 @@
import java.util.NoSuchElementException;
/**
- * <p>A "generic" {@link ItemSimilarity} which takes a static list of precomputed {@link Item}
- * similarities and bases its responses on that alone. The values may have been precomputed
- * offline by another process, stored in a file, and then read and fed into an instance of this class.</p>
+ * <p>A "generic" {@link ItemSimilarity} which takes a static list of precomputed {@link Item} similarities and bases
+ * its responses on that alone. The values may have been precomputed offline by another process, stored in a file, and
+ * then read and fed into an instance of this class.</p>
*
- * <p>This is perhaps the best {@link ItemSimilarity} to use with
- * {@link org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender}, for now, since the point of item-based
- * recommenders is that they can take advantage of the fact that item similarity is relatively static,
- * can be precomputed, and then used in computation to gain a significant performance advantage.</p>
+ * <p>This is perhaps the best {@link ItemSimilarity} to use with {@link org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender},
+ * for now, since the point of item-based recommenders is that they can take advantage of the fact that item similarity
+ * is relatively static, can be precomputed, and then used in computation to gain a significant performance
+ * advantage.</p>
*/
public final class GenericItemSimilarity implements ItemSimilarity {
private final Map<Item, Map<Item, Double>> similarityMaps = new FastMap<Item, Map<Item, Double>>();
/**
- * <p>Creates a {@link GenericItemSimilarity} from a precomputed list of
- * {@link ItemItemSimilarity}s. Each
- * represents the similarity between two distinct items. Since similarity is assumed to be symmetric,
- * it is not necessary to specify similarity between item1 and item2, and item2 and item1. Both are the same.
- * It is also not necessary to specify a similarity between any item and itself; these are assumed to be 1.0.</p>
+ * <p>Creates a {@link GenericItemSimilarity} from a precomputed list of {@link ItemItemSimilarity}s. Each represents
+ * the similarity between two distinct items. Since similarity is assumed to be symmetric, it is not necessary to
+ * specify similarity between item1 and item2, and item2 and item1. Both are the same. It is also not necessary to
+ * specify a similarity between any item and itself; these are assumed to be 1.0.</p>
*
- * <p>Note that specifying a similarity between two items twice is not an error, but, the later value will
- * win.</p>
+ * <p>Note that specifying a similarity between two items twice is not an error, but, the later value will win.</p>
*
- * @param similarities set of
- * {@link ItemItemSimilarity}s
- * on which to base this instance
+ * @param similarities set of {@link ItemItemSimilarity}s on which to base this instance
*/
public GenericItemSimilarity(Iterable<ItemItemSimilarity> similarities) {
initSimilarityMaps(similarities);
}
/**
- * <p>Like {@link #GenericItemSimilarity(Iterable)}, but will only keep the specified number of similarities
- * from the given {@link Iterable} of similarities. It will keep those with the highest similarity --
- * those that are therefore most important.</p>
+ * <p>Like {@link #GenericItemSimilarity(Iterable)}, but will only keep the specified number of similarities from the
+ * given {@link Iterable} of similarities. It will keep those with the highest similarity -- those that are therefore
+ * most important.</p>
*
* <p>Thanks to tsmorton for suggesting this and providing part of the implementation.</p>
*
- * @param similarities set of
- * {@link ItemItemSimilarity}s
- * on which to base this instance
- * @param maxToKeep maximum number of similarities to keep
+ * @param similarities set of {@link ItemItemSimilarity}s on which to base this instance
+ * @param maxToKeep maximum number of similarities to keep
*/
public GenericItemSimilarity(Iterable<ItemItemSimilarity> similarities, int maxToKeep) {
Iterable<ItemItemSimilarity> keptSimilarities = TopItems.getTopItemItemSimilarities(maxToKeep, similarities);
@@ -84,17 +78,16 @@
}
/**
- * <p>Builds a list of item-item similarities given an {@link ItemSimilarity} implementation and a
- * {@link DataModel}, rather than a list of
- * {@link ItemItemSimilarity}s.</p>
+ * <p>Builds a list of item-item similarities given an {@link ItemSimilarity} implementation and a {@link DataModel},
+ * rather than a list of {@link ItemItemSimilarity}s.</p>
*
- * <p>It's valid to build a {@link GenericItemSimilarity} this way, but perhaps missing some of the point
- * of an item-based recommender. Item-based recommenders use the assumption that item-item similarities
- * are relatively fixed, and might be known already independent of user preferences. Hence it is useful
- * to inject that information, using {@link #GenericItemSimilarity(Iterable)}.</p>
+ * <p>It's valid to build a {@link GenericItemSimilarity} this way, but perhaps missing some of the point of an
+ * item-based recommender. Item-based recommenders use the assumption that item-item similarities are relatively
+ * fixed, and might be known already independent of user preferences. Hence it is useful to inject that information,
+ * using {@link #GenericItemSimilarity(Iterable)}.</p>
*
* @param otherSimilarity other {@link ItemSimilarity} to get similarities from
- * @param dataModel data model to get {@link Item}s from
+ * @param dataModel data model to get {@link Item}s from
* @throws TasteException if an error occurs while accessing the {@link DataModel} items
*/
public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel) throws TasteException {
@@ -104,23 +97,23 @@
}
/**
- * <p>Like {@link #GenericItemSimilarity(ItemSimilarity, DataModel)} )}, but will only
- * keep the specified number of similarities from the given {@link DataModel}.
- * It will keep those with the highest similarity -- those that are therefore most important.</p>
+ * <p>Like {@link #GenericItemSimilarity(ItemSimilarity, DataModel)} )}, but will only keep the specified number of
+ * similarities from the given {@link DataModel}. It will keep those with the highest similarity -- those that are
+ * therefore most important.</p>
*
* <p>Thanks to tsmorton for suggesting this and providing part of the implementation.</p>
*
* @param otherSimilarity other {@link ItemSimilarity} to get similarities from
- * @param dataModel data model to get {@link Item}s from
- * @param maxToKeep maximum number of similarities to keep
+ * @param dataModel data model to get {@link Item}s from
+ * @param maxToKeep maximum number of similarities to keep
* @throws TasteException if an error occurs while accessing the {@link DataModel} items
*/
public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
- throws TasteException {
+ throws TasteException {
List<? extends Item> items = IteratorUtils.iterableToList(dataModel.getItems());
Iterator<ItemItemSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, items);
Iterable<ItemItemSimilarity> keptSimilarities =
- TopItems.getTopItemItemSimilarities(maxToKeep, new IteratorIterable<ItemItemSimilarity>(it));
+ TopItems.getTopItemItemSimilarities(maxToKeep, new IteratorIterable<ItemItemSimilarity>(it));
initSimilarityMaps(keptSimilarities);
}
@@ -153,8 +146,8 @@
/**
* <p>Returns the similarity between two items. Note that similarity is assumed to be symmetric, that
- * <code>itemSimilarity(item1, item2) == itemSimilarity(item2, item1)</code>, and that
- * <code>itemSimilarity(item1, item1) == 1.0</code> for all items.</p>
+ * <code>itemSimilarity(item1, item2) == itemSimilarity(item2, item1)</code>, and that <code>itemSimilarity(item1,
+ * item1) == 1.0</code> for all items.</p>
*
* @param item1 first item
* @param item2 second item
@@ -188,9 +181,7 @@
// Do nothing
}
- /**
- * Encapsulates a similarity between two items. Similarity must be in the range [-1.0,1.0].
- */
+ /** Encapsulates a similarity between two items. Similarity must be in the range [-1.0,1.0]. */
public static final class ItemItemSimilarity implements Comparable<ItemItemSimilarity> {
private final Item item1;
@@ -232,9 +223,7 @@
return "ItemItemSimilarity[" + item1 + ',' + item2 + ':' + value + ']';
}
- /**
- * Defines an ordering from highest similarity to lowest.
- */
+ /** Defines an ordering from highest similarity to lowest. */
@Override
public int compareTo(ItemItemSimilarity other) {
double otherValue = other.value;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java Fri Jul 10 09:35:19 2009
@@ -19,8 +19,6 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.UserSimilarity;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.impl.common.FastMap;
import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
import org.apache.mahout.cf.taste.impl.common.IteratorUtils;
@@ -28,6 +26,8 @@
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.util.Collection;
import java.util.Iterator;
@@ -55,11 +55,11 @@
}
public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
- throws TasteException {
+ throws TasteException {
List<? extends User> users = IteratorUtils.iterableToList(dataModel.getUsers());
Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, users);
Iterable<UserUserSimilarity> keptSimilarities =
- TopItems.getTopUserUserSimilarities(maxToKeep, new IteratorIterable<UserUserSimilarity>(it));
+ TopItems.getTopUserUserSimilarities(maxToKeep, new IteratorIterable<UserUserSimilarity>(it));
initSimilarityMaps(keptSimilarities);
}
@@ -158,9 +158,7 @@
return "UserUserSimilarity[" + user1 + ',' + user2 + ':' + value + ']';
}
- /**
- * Defines an ordering from highest similarity to lowest.
- */
+ /** Defines an ordering from highest similarity to lowest. */
@Override
public int compareTo(UserUserSimilarity other) {
double otherValue = other.value;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java Fri Jul 10 09:35:19 2009
@@ -19,20 +19,18 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
-import org.apache.mahout.cf.taste.similarity.UserSimilarity;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.User;
import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.util.Collection;
-/**
- * See <a href="http://citeseer.ist.psu.edu/29096.html">http://citeseer.ist.psu.edu/29096.html</a>.
- */
+/** See <a href="http://citeseer.ist.psu.edu/29096.html">http://citeseer.ist.psu.edu/29096.html</a>. */
public final class LogLikelihoodSimilarity implements UserSimilarity, ItemSimilarity {
private final DataModel dataModel;
@@ -69,7 +67,7 @@
int numItems = dataModel.getNumItems();
double logLikelihood =
- twoLogLambda(intersectionSize, xPrefs.length - intersectionSize, yPrefs.length, numItems - yPrefs.length);
+ twoLogLambda(intersectionSize, xPrefs.length - intersectionSize, yPrefs.length, numItems - yPrefs.length);
return 1.0 - 1.0 / (1.0 + logLikelihood);
}
@@ -115,7 +113,7 @@
int preferring2 = dataModel.getNumUsersWithPreferenceFor(item2.getID());
int numUsers = dataModel.getNumUsers();
double logLikelihood =
- twoLogLambda(preferring1and2, preferring1 - preferring1and2, preferring2, numUsers - preferring2);
+ twoLogLambda(preferring1and2, preferring1 - preferring1and2, preferring2, numUsers - preferring2);
return 1.0 - 1.0 / (1.0 + logLikelihood);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java Fri Jul 10 09:35:19 2009
@@ -23,26 +23,22 @@
import org.apache.mahout.cf.taste.model.User;
/**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
- * are calculated:</p>
+ * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values are calculated:</p>
*
- * <ul>
- * <li>sumX2: sum of the square of all X's preference values</li>
- * <li>sumY2: sum of the square of all Y's preference values</li>
- * <li>sumXY: sum of the product of X and Y's preference value for all items for which both
- * X and Y express a preference</li>
- * </ul>
+ * <ul> <li>sumX2: sum of the square of all X's preference values</li> <li>sumY2: sum of the square of all Y's
+ * preference values</li> <li>sumXY: sum of the product of X and Y's preference value for all items for which both X and
+ * Y express a preference</li> </ul>
*
* <p>The correlation is then:
*
* <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
*
- * <p>Note that this correlation "centers" its data, shifts the user's preference values so that
- * each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
+ * <p>Note that this correlation "centers" its data, shifts the user's preference values so that each of their means is
+ * 0. This is necessary to achieve expected behavior on all data sets.</p>
*
- * <p>This correlation implementation is equivalent to the cosine measure correlation since the data it
- * receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
- * angle between the two vectors defined by the users' preference values.</p>
+ * <p>This correlation implementation is equivalent to the cosine measure correlation since the data it receives is
+ * assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the angle between the two
+ * vectors defined by the users' preference values.</p>
*/
public final class PearsonCorrelationSimilarity extends AbstractSimilarity {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java Fri Jul 10 09:35:19 2009
@@ -19,25 +19,25 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
-import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.model.ByItemPreferenceComparator;
import org.apache.mahout.cf.taste.impl.model.ByValuePreferenceComparator;
import org.apache.mahout.cf.taste.impl.model.GenericPreference;
import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.util.Arrays;
import java.util.Collection;
/**
- * <p>Like {@link PearsonCorrelationSimilarity}, but compares relative ranking of preference values instead of preference
- * values themselves. That is, each {@link User}'s preferences are sorted and then assign a rank as their preference
- * value, with 1 being assigned to the least preferred item. Then the Pearson correlation of these rank values is
- * computed.</p>
+ * <p>Like {@link PearsonCorrelationSimilarity}, but compares relative ranking of preference values instead of
+ * preference values themselves. That is, each {@link User}'s preferences are sorted and then assign a rank as their
+ * preference value, with 1 being assigned to the least preferred item. Then the Pearson correlation of these rank
+ * values is computed.</p>
*/
public final class SpearmanCorrelationSimilarity implements UserSimilarity {
@@ -63,7 +63,7 @@
throw new IllegalArgumentException("user1 or user2 is null");
}
return rankingUserSimilarity.userSimilarity(new RankedPreferenceUser(user1),
- new RankedPreferenceUser(user2));
+ new RankedPreferenceUser(user2));
}
@Override
@@ -79,8 +79,8 @@
/**
- * <p>A simple {@link User} decorator which will always return the underlying {@link User}'s
- * preferences in order by value.</p>
+ * <p>A simple {@link User} decorator which will always return the underlying {@link User}'s preferences in order by
+ * value.</p>
*/
private static final class RankedPreferenceUser implements User {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java Fri Jul 10 09:35:19 2009
@@ -19,26 +19,24 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
-import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
-import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.util.Collection;
/**
- * <p>An implementation of a "similarity" based on the
- * <a href="http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29">
- * Tanimoto coefficient</a>,
- * or extended <a href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard coefficient</a>.</p>
+ * <p>An implementation of a "similarity" based on the <a href="http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29">
+ * Tanimoto coefficient</a>, or extended <a href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard
+ * coefficient</a>.</p>
*
- * <p>This is intended for "binary" data sets where a user either expresses a generic "yes" preference
- * for an item or has no preference. The actual preference values do not matter here, only their presence
- * or absence.</p>
+ * <p>This is intended for "binary" data sets where a user either expresses a generic "yes" preference for an item or
+ * has no preference. The actual preference values do not matter here, only their presence or absence.</p>
*
* <p>The value returned is in [0,1].</p>
*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java Fri Jul 10 09:35:19 2009
@@ -23,9 +23,8 @@
import java.util.Collection;
/**
- * <p>Applies "case amplification" to similarities. This essentially makes big values bigger
- * and small values smaller by raising each score to a power. It could however be used to achieve the
- * opposite effect.</p>
+ * <p>Applies "case amplification" to similarities. This essentially makes big values bigger and small values smaller by
+ * raising each score to a power. It could however be used to achieve the opposite effect.</p>
*/
public final class CaseAmplification implements SimilarityTransform<Object> {
@@ -45,14 +44,14 @@
}
/**
- * <p>Transforms one similarity value. This implementation is such that it's possible to define this
- * transformation on one value in isolation. The "thing" parameters are therefore unused.</p>
+ * <p>Transforms one similarity value. This implementation is such that it's possible to define this transformation on
+ * one value in isolation. The "thing" parameters are therefore unused.</p>
*
* @param thing1 unused
* @param thing2 unused
- * @param value similarity to transform
- * @return <code>value<sup>factor</sup></code> if value is nonnegative;
- * <code>-value<sup>-factor</sup></code> otherwise
+ * @param value similarity to transform
+ * @return <code>value<sup>factor</sup></code> if value is nonnegative; <code>-value<sup>-factor</sup></code>
+ * otherwise
*/
@Override
public double transformSimilarity(Object thing1, Object thing2, double value) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/Counters.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/Counters.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/Counters.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/Counters.java Fri Jul 10 09:35:19 2009
@@ -21,9 +21,7 @@
import java.util.Map;
-/**
- * <p>A simple, fast utility class that maps keys to counts.</p>
- */
+/** <p>A simple, fast utility class that maps keys to counts.</p> */
final class Counters<T> {
private final Map<T, int[]> counts = new FastMap<T, int[]>();