You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/08/04 02:06:50 UTC
svn commit: r800634 [4/7] - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/impl/common/
core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/
core/src/main/java/org...
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java Tue Aug 4 00:06:46 2009
@@ -27,8 +27,7 @@
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.ClusteringRecommender;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Rescorer;
@@ -45,17 +44,17 @@
import java.util.concurrent.locks.ReentrantLock;
/**
- * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters {@link User}s, then determines the
+ * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters users, then determines the
* clusters' top recommendations. This implementation builds clusters by repeatedly merging clusters until only a
* certain number remain, meaning that each cluster is sort of a tree of other clusters.</p>
*
* <p>This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:</p>
*
* <ul>
- * <li>For all {@link User}s in a cluster, recommendations will be the same</li>
+ * <li>For all users in a cluster, recommendations will be the same</li>
* <li>{@link #estimatePreference(Comparable, Comparable)} may well return {@link Double#NaN};
* it does so when asked to estimate preference for an item for
- * which no preference is expressed in the {@link User}s in the cluster.</li>
+ * which no preference is expressed in the users in the cluster.</li>
* </ul>
*/
public final class TreeClusteringRecommender extends AbstractRecommender implements ClusteringRecommender {
@@ -70,14 +69,14 @@
private final boolean clusteringByThreshold;
private final double samplingRate;
private Map<Comparable<?>, List<RecommendedItem>> topRecsByUserID;
- private Collection<Collection<User>> allClusters;
- private Map<Comparable<?>, Collection<User>> clustersByUserID;
+ private Collection<Collection<Comparable<?>>> allClusters;
+ private Map<Comparable<?>, Collection<Comparable<?>>> clustersByUserID;
private boolean clustersBuilt;
private final ReentrantLock buildClustersLock;
private final RefreshHelper refreshHelper;
/**
- * @param dataModel {@link DataModel} which provdes {@link User}s
+ * @param dataModel {@link DataModel} which provdes users
* @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
* @param numClusters desired number of clusters to create
* @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is less than 2
@@ -89,7 +88,7 @@
}
/**
- * @param dataModel {@link DataModel} which provdes {@link User}s
+ * @param dataModel {@link DataModel} which provdes users
* @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
* @param numClusters desired number of clusters to create
* @param samplingRate percentage of all cluster-cluster pairs to consider when finding next-most-similar
@@ -129,7 +128,7 @@
}
/**
- * @param dataModel {@link DataModel} which provdes {@link User}s
+ * @param dataModel {@link DataModel} which provdes users
* @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
* @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger clusters until
* the next two nearest clusters' similarity drops below this threshold
@@ -143,7 +142,7 @@
}
/**
- * @param dataModel {@link DataModel} which provides {@link User}s
+ * @param dataModel {@link DataModel} which provides users
* @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
* @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger clusters until
* the next two nearest clusters' similarity drops below this threshold
@@ -203,7 +202,7 @@
return Collections.emptyList();
}
- User theUser = getDataModel().getUser(userID);
+ DataModel dataModel = getDataModel();
List<RecommendedItem> rescored = new ArrayList<RecommendedItem>(recommended.size());
// Only add items the user doesn't already have a preference for.
// And that the rescorer doesn't "reject".
@@ -212,7 +211,7 @@
if (rescorer != null && rescorer.isFiltered(itemID)) {
continue;
}
- if (theUser.getPreferenceFor(itemID) == null &&
+ if (dataModel.getPreferenceValue(userID, itemID) == null &&
(rescorer == null || !Double.isNaN(rescorer.rescore(itemID, recommendedItem.getValue())))) {
rescored.add(recommendedItem);
}
@@ -223,15 +222,14 @@
}
@Override
- public double estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+ public float estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
if (userID == null || itemID == null) {
throw new IllegalArgumentException("userID or itemID is null");
}
DataModel model = getDataModel();
- User theUser = model.getUser(userID);
- Preference actualPref = theUser.getPreferenceFor(itemID);
+ Float actualPref = model.getPreferenceValue(userID, itemID);
if (actualPref != null) {
- return actualPref.getValue();
+ return actualPref;
}
checkClustersBuilt();
List<RecommendedItem> topRecsForUser = topRecsByUserID.get(userID);
@@ -243,21 +241,21 @@
}
}
// Hmm, we have no idea. The item is not in the user's cluster
- return Double.NaN;
+ return Float.NaN;
}
@Override
- public Collection<User> getCluster(Comparable<?> userID) throws TasteException {
+ public Collection<Comparable<?>> getCluster(Comparable<?> userID) throws TasteException {
if (userID == null) {
throw new IllegalArgumentException("userID is null");
}
checkClustersBuilt();
- Collection<User> cluster = clustersByUserID.get(userID);
- return cluster == null ? Collections.<User>emptyList() : cluster;
+ Collection<Comparable<?>> cluster = clustersByUserID.get(userID);
+ return cluster == null ? Collections.<Comparable<?>>emptyList() : cluster;
}
@Override
- public Collection<Collection<User>> getClusters() throws TasteException {
+ public Collection<Collection<Comparable<?>>> getClusters() throws TasteException {
checkClustersBuilt();
return allClusters;
}
@@ -274,17 +272,14 @@
DataModel model = getDataModel();
int numUsers = model.getNumUsers();
if (numUsers > 0) {
- List<Collection<User>> newClusters = new ArrayList<Collection<User>>(numUsers);
- if (numUsers == 1) {
- User onlyUser = model.getUsers().iterator().next();
- newClusters.add(Collections.singleton(onlyUser));
- } else {
- // Begin with a cluster for each user:
- for (User user : model.getUsers()) {
- Collection<User> newCluster = new FastSet<User>();
- newCluster.add(user);
- newClusters.add(newCluster);
- }
+ List<Collection<Comparable<?>>> newClusters = new ArrayList<Collection<Comparable<?>>>(numUsers);
+ // Begin with a cluster for each user:
+ for (Comparable<?> userID : model.getUserIDs()) {
+ Collection<Comparable<?>> newCluster = new FastSet<Comparable<?>>();
+ newCluster.add(userID);
+ newClusters.add(newCluster);
+ }
+ if (numUsers > 1) {
findClusters(newClusters);
}
topRecsByUserID = computeTopRecsPerUserID(newClusters);
@@ -301,16 +296,16 @@
}
}
- private void findClusters(List<Collection<User>> newClusters) throws TasteException {
+ private void findClusters(List<Collection<Comparable<?>>> newClusters) throws TasteException {
if (clusteringByThreshold) {
- Pair<Collection<User>, Collection<User>> nearestPair = findNearestClusters(newClusters);
+ Pair<Collection<Comparable<?>>, Collection<Comparable<?>>> nearestPair = findNearestClusters(newClusters);
if (nearestPair != null) {
- Collection<User> cluster1 = nearestPair.getFirst();
- Collection<User> cluster2 = nearestPair.getSecond();
+ Collection<Comparable<?>> cluster1 = nearestPair.getFirst();
+ Collection<Comparable<?>> cluster2 = nearestPair.getSecond();
while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold) {
newClusters.remove(cluster1);
newClusters.remove(cluster2);
- Collection<User> merged = new FastSet<User>(cluster1.size() + cluster2.size());
+ Collection<Comparable<?>> merged = new FastSet<Comparable<?>>(cluster1.size() + cluster2.size());
merged.addAll(cluster1);
merged.addAll(cluster2);
newClusters.add(merged);
@@ -324,16 +319,16 @@
}
} else {
while (newClusters.size() > numClusters) {
- Pair<Collection<User>, Collection<User>> nearestPair =
+ Pair<Collection<Comparable<?>>, Collection<Comparable<?>>> nearestPair =
findNearestClusters(newClusters);
if (nearestPair == null) {
break;
}
- Collection<User> cluster1 = nearestPair.getFirst();
- Collection<User> cluster2 = nearestPair.getSecond();
+ Collection<Comparable<?>> cluster1 = nearestPair.getFirst();
+ Collection<Comparable<?>> cluster2 = nearestPair.getSecond();
newClusters.remove(cluster1);
newClusters.remove(cluster2);
- Collection<User> merged = new FastSet<User>(cluster1.size() + cluster2.size());
+ Collection<Comparable<?>> merged = new FastSet<Comparable<?>>(cluster1.size() + cluster2.size());
merged.addAll(cluster1);
merged.addAll(cluster2);
newClusters.add(merged);
@@ -341,20 +336,20 @@
}
}
- private Pair<Collection<User>, Collection<User>> findNearestClusters(List<Collection<User>> clusters)
- throws TasteException {
+ private Pair<Collection<Comparable<?>>, Collection<Comparable<?>>>
+ findNearestClusters(List<Collection<Comparable<?>>> clusters) throws TasteException {
int size = clusters.size();
- Pair<Collection<User>, Collection<User>> nearestPair = null;
+ Pair<Collection<Comparable<?>>, Collection<Comparable<?>>> nearestPair = null;
double bestSimilarity = Double.NEGATIVE_INFINITY;
for (int i = 0; i < size; i++) {
- Collection<User> cluster1 = clusters.get(i);
+ Collection<Comparable<?>> cluster1 = clusters.get(i);
for (int j = i + 1; j < size; j++) {
if (samplingRate >= 1.0 || r.nextDouble() < samplingRate) {
- Collection<User> cluster2 = clusters.get(j);
+ Collection<Comparable<?>> cluster2 = clusters.get(j);
double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
if (!Double.isNaN(similarity) && similarity > bestSimilarity) {
bestSimilarity = similarity;
- nearestPair = new Pair<Collection<User>, Collection<User>>(cluster1, cluster2);
+ nearestPair = new Pair<Collection<Comparable<?>>, Collection<Comparable<?>>>(cluster1, cluster2);
}
}
}
@@ -362,26 +357,27 @@
return nearestPair;
}
- private static Map<Comparable<?>, List<RecommendedItem>> computeTopRecsPerUserID(
- Iterable<Collection<User>> clusters) throws TasteException {
+ private Map<Comparable<?>, List<RecommendedItem>> computeTopRecsPerUserID(
+ Iterable<Collection<Comparable<?>>> clusters) throws TasteException {
Map<Comparable<?>, List<RecommendedItem>> recsPerUser = new FastMap<Comparable<?>, List<RecommendedItem>>();
- for (Collection<User> cluster : clusters) {
+ for (Collection<Comparable<?>> cluster : clusters) {
List<RecommendedItem> recs = computeTopRecsForCluster(cluster);
- for (User user : cluster) {
- recsPerUser.put(user.getID(), recs);
+ for (Comparable<?> userID : cluster) {
+ recsPerUser.put(userID, recs);
}
}
return Collections.unmodifiableMap(recsPerUser);
}
- private static List<RecommendedItem> computeTopRecsForCluster(Collection<User> cluster)
+ private List<RecommendedItem> computeTopRecsForCluster(Collection<Comparable<?>> cluster)
throws TasteException {
-
+ DataModel dataModel = getDataModel();
Collection<Comparable<?>> allItemIDs = new FastSet<Comparable<?>>();
- for (User user : cluster) {
- Preference[] prefs = user.getPreferencesAsArray();
- for (Preference pref : prefs) {
- allItemIDs.add(pref.getItemID());
+ for (Comparable<?> userID : cluster) {
+ PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
+ int size = prefs.length();
+ for (int i = 0; i < size; i++) {
+ allItemIDs.add(prefs.getItemID(i));
}
}
@@ -394,12 +390,13 @@
return Collections.unmodifiableList(topItems);
}
- private static Map<Comparable<?>, Collection<User>> computeClustersPerUserID(Collection<Collection<User>> clusters) {
- Map<Comparable<?>, Collection<User>> clustersPerUser =
- new FastMap<Comparable<?>, Collection<User>>(clusters.size());
- for (Collection<User> cluster : clusters) {
- for (User user : cluster) {
- clustersPerUser.put(user.getID(), cluster);
+ private static Map<Comparable<?>, Collection<Comparable<?>>>
+ computeClustersPerUserID(Collection<Collection<Comparable<?>>> clusters) {
+ Map<Comparable<?>, Collection<Comparable<?>>> clustersPerUser =
+ new FastMap<Comparable<?>, Collection<Comparable<?>>>(clusters.size());
+ for (Collection<Comparable<?>> cluster : clusters) {
+ for (Comparable<?> userID : cluster) {
+ clustersPerUser.put(userID, cluster);
}
}
return clustersPerUser;
@@ -415,21 +412,22 @@
return "TreeClusteringRecommender[clusterSimilarity:" + clusterSimilarity + ']';
}
- private static class Estimator implements TopItems.Estimator<Comparable<?>> {
+ private class Estimator implements TopItems.Estimator<Comparable<?>> {
- private final Collection<User> cluster;
+ private final Collection<Comparable<?>> cluster;
- private Estimator(Collection<User> cluster) {
+ private Estimator(Collection<Comparable<?>> cluster) {
this.cluster = cluster;
}
@Override
- public double estimate(Comparable<?> itemID) {
+ public double estimate(Comparable<?> itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
RunningAverage average = new FullRunningAverage();
- for (User user : cluster) {
- Preference pref = user.getPreferenceFor(itemID);
+ for (Comparable<?> userID : cluster) {
+ Float pref = dataModel.getPreferenceValue(userID, itemID);
if (pref != null) {
- average.addDatum(pref.getValue());
+ average.addDatum(pref);
}
}
return average.getAverage();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Tue Aug 4 00:06:46 2009
@@ -26,8 +26,7 @@
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.ClusteringRecommender;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Rescorer;
@@ -46,19 +45,19 @@
import java.util.concurrent.locks.ReentrantLock;
/**
- * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters {@link
- * org.apache.mahout.cf.taste.model.User}s, then determines the clusters' top recommendations. This implementation
+ * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters users,
+ * then determines the clusters' top recommendations. This implementation
* builds clusters by repeatedly merging clusters until only a certain number remain, meaning that each cluster is sort
* of a tree of other clusters.</p>
*
* <p>This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:</p> <ul>
- * <li>For all {@link org.apache.mahout.cf.taste.model.User}s in a cluster, recommendations will be the same</li>
+ * <li>For all users in a cluster, recommendations will be the same</li>
* <li>{@link #estimatePreference(Comparable, Comparable)} may well return {@link Double#NaN}; it does so
- * when asked to estimate preference for an item for which no preference is expressed in the {@link
- * org.apache.mahout.cf.taste.model.User}s in the cluster.</li> </ul>
+ * when asked to estimate preference for an item for which no preference is expressed in the
+ * users in the cluster.</li> </ul>
*
* <p>This is an <em>experimental</em> implementation which tries to gain a lot of speed at the cost of accuracy in
- * building clusters, compared to {@link org.apache.mahout.cf.taste.impl.recommender.TreeClusteringRecommender}. It will
+ * building clusters, compared to {@link TreeClusteringRecommender}. It will
* sometimes cluster two other clusters together that may not be the exact closest two clusters in existence. This may
* not affect the recommendation quality much, but it potentially speeds up the clustering process dramatically.</p>
*/
@@ -71,16 +70,15 @@
private final double clusteringThreshold;
private final boolean clusteringByThreshold;
private Map<Comparable<?>, List<RecommendedItem>> topRecsByUserID;
- private Collection<Collection<User>> allClusters;
- private Map<Comparable<?>, Collection<User>> clustersByUserID;
+ private Collection<Collection<Comparable<?>>> allClusters;
+ private Map<Comparable<?>, Collection<Comparable<?>>> clustersByUserID;
private boolean clustersBuilt;
private final ReentrantLock buildClustersLock;
private final RefreshHelper refreshHelper;
/**
- * @param dataModel {@link org.apache.mahout.cf.taste.model.DataModel} which provdes {@link
- * org.apache.mahout.cf.taste.model.User}s
- * @param clusterSimilarity {@link org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity} used to compute
+ * @param dataModel {@link DataModel} which provides users
+ * @param clusterSimilarity {@link ClusterSimilarity} used to compute
* cluster similarity
* @param numClusters desired number of clusters to create
* @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is less than 2
@@ -112,8 +110,7 @@
}
/**
- * @param dataModel {@link org.apache.mahout.cf.taste.model.DataModel} which provdes {@link
- * org.apache.mahout.cf.taste.model.User}s
+ * @param dataModel {@link org.apache.mahout.cf.taste.model.DataModel} which provides users
* @param clusterSimilarity {@link org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity} used to compute
* cluster similarity
* @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger clusters until
@@ -165,7 +162,7 @@
return Collections.emptyList();
}
- User theUser = getDataModel().getUser(userID);
+ DataModel dataModel = getDataModel();
List<RecommendedItem> rescored = new ArrayList<RecommendedItem>(recommended.size());
// Only add items the user doesn't already have a preference for.
// And that the rescorer doesn't "reject".
@@ -174,7 +171,7 @@
if (rescorer != null && rescorer.isFiltered(itemID)) {
continue;
}
- if (theUser.getPreferenceFor(itemID) == null &&
+ if (dataModel.getPreferenceValue(userID, itemID) == null &&
(rescorer == null || !Double.isNaN(rescorer.rescore(itemID, recommendedItem.getValue())))) {
rescored.add(recommendedItem);
}
@@ -185,15 +182,14 @@
}
@Override
- public double estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+ public float estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
if (userID == null || itemID == null) {
throw new IllegalArgumentException("userID or itemID is null");
}
DataModel model = getDataModel();
- User theUser = model.getUser(userID);
- Preference actualPref = theUser.getPreferenceFor(itemID);
+ Float actualPref = getDataModel().getPreferenceValue(userID, itemID);
if (actualPref != null) {
- return actualPref.getValue();
+ return actualPref;
}
checkClustersBuilt();
List<RecommendedItem> topRecsForUser = topRecsByUserID.get(userID);
@@ -205,21 +201,21 @@
}
}
// Hmm, we have no idea. The item is not in the user's cluster
- return Double.NaN;
+ return Float.NaN;
}
@Override
- public Collection<User> getCluster(Comparable<?> userID) throws TasteException {
+ public Collection<Comparable<?>> getCluster(Comparable<?> userID) throws TasteException {
if (userID == null) {
throw new IllegalArgumentException("userID is null");
}
checkClustersBuilt();
- Collection<User> cluster = clustersByUserID.get(userID);
- return cluster == null ? Collections.<User>emptyList() : cluster;
+ Collection<Comparable<?>> cluster = clustersByUserID.get(userID);
+ return cluster == null ? Collections.<Comparable<?>>emptyList() : cluster;
}
@Override
- public Collection<Collection<User>> getClusters() throws TasteException {
+ public Collection<Collection<Comparable<?>>> getClusters() throws TasteException {
checkClustersBuilt();
return allClusters;
}
@@ -232,23 +228,23 @@
private static final class ClusterClusterPair implements Comparable<ClusterClusterPair> {
- private final Collection<User> cluster1;
- private final Collection<User> cluster2;
+ private final Collection<Comparable<?>> cluster1;
+ private final Collection<Comparable<?>> cluster2;
private final double similarity;
- private ClusterClusterPair(Collection<User> cluster1,
- Collection<User> cluster2,
+ private ClusterClusterPair(Collection<Comparable<?>> cluster1,
+ Collection<Comparable<?>> cluster2,
double similarity) {
this.cluster1 = cluster1;
this.cluster2 = cluster2;
this.similarity = similarity;
}
- private Collection<User> getCluster1() {
+ private Collection<Comparable<?>> getCluster1() {
return cluster1;
}
- private Collection<User> getCluster2() {
+ private Collection<Comparable<?>> getCluster2() {
return cluster2;
}
@@ -299,11 +295,11 @@
} else {
- List<Collection<User>> clusters = new LinkedList<Collection<User>>();
+ List<Collection<Comparable<?>>> clusters = new LinkedList<Collection<Comparable<?>>>();
// Begin with a cluster for each user:
- for (User user : model.getUsers()) {
- Collection<User> newCluster = new FastSet<User>();
- newCluster.add(user);
+ for (Comparable<?> userID : model.getUserIDs()) {
+ Collection<Comparable<?>> newCluster = new FastSet<Comparable<?>>();
+ newCluster.add(userID);
clusters.add(newCluster);
}
@@ -324,7 +320,7 @@
}
}
- private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done)
+ private boolean mergeClosestClusters(int numUsers, List<Collection<Comparable<?>>> clusters, boolean done)
throws TasteException {
// We find a certain number of closest clusters...
LinkedList<ClusterClusterPair> queue = findClosestClusters(numUsers, clusters);
@@ -348,15 +344,15 @@
break;
}
- Collection<User> cluster1 = top.getCluster1();
- Collection<User> cluster2 = top.getCluster2();
+ Collection<Comparable<?>> cluster1 = top.getCluster1();
+ Collection<Comparable<?>> cluster2 = top.getCluster2();
// Pull out current two clusters from clusters
- Iterator<Collection<User>> clusterIterator = clusters.iterator();
+ Iterator<Collection<Comparable<?>>> clusterIterator = clusters.iterator();
boolean removed1 = false;
boolean removed2 = false;
while (clusterIterator.hasNext() && !(removed1 && removed2)) {
- Collection<User> current = clusterIterator.next();
+ Collection<Comparable<?>> current = clusterIterator.next();
// Yes, use == here
if (!removed1 && cluster1 == current) {
clusterIterator.remove();
@@ -372,22 +368,22 @@
for (Iterator<ClusterClusterPair> queueIterator = queue.iterator();
queueIterator.hasNext();) {
ClusterClusterPair pair = queueIterator.next();
- Collection<User> pair1 = pair.getCluster1();
- Collection<User> pair2 = pair.getCluster2();
+ Collection<Comparable<?>> pair1 = pair.getCluster1();
+ Collection<Comparable<?>> pair2 = pair.getCluster2();
if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2) {
queueIterator.remove();
}
}
// Make new merged cluster
- Collection<User> merged = new FastSet<User>(cluster1.size() + cluster2.size());
+ Collection<Comparable<?>> merged = new FastSet<Comparable<?>>(cluster1.size() + cluster2.size());
merged.addAll(cluster1);
merged.addAll(cluster2);
// Compare against other clusters; update queue if needed
// That new pair we're just adding might be pretty close to something else, so
// catch that case here and put it back into our queue
- for (Collection<User> cluster : clusters) {
+ for (Collection<Comparable<?>> cluster : clusters) {
double similarity = clusterSimilarity.getSimilarity(merged, cluster);
if (similarity > queue.getLast().getSimilarity()) {
ListIterator<ClusterClusterPair> queueIterator = queue.listIterator();
@@ -408,16 +404,16 @@
return done;
}
- private LinkedList<ClusterClusterPair> findClosestClusters(int numUsers, List<Collection<User>> clusters)
+ private LinkedList<ClusterClusterPair> findClosestClusters(int numUsers, List<Collection<Comparable<?>>> clusters)
throws TasteException {
boolean full = false;
LinkedList<ClusterClusterPair> queue = new LinkedList<ClusterClusterPair>();
int i = 0;
- for (Collection<User> cluster1 : clusters) {
+ for (Collection<Comparable<?>> cluster1 : clusters) {
i++;
- ListIterator<Collection<User>> it2 = clusters.listIterator(i);
+ ListIterator<Collection<Comparable<?>>> it2 = clusters.listIterator(i);
while (it2.hasNext()) {
- Collection<User> cluster2 = it2.next();
+ Collection<Comparable<?>> cluster2 = it2.next();
double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
if (!Double.isNaN(similarity) &&
(!full || similarity > queue.getLast().getSimilarity())) {
@@ -442,26 +438,28 @@
return queue;
}
- private static Map<Comparable<?>, List<RecommendedItem>> computeTopRecsPerUserID(Iterable<Collection<User>> clusters)
- throws TasteException {
+ private Map<Comparable<?>, List<RecommendedItem>>
+ computeTopRecsPerUserID(Iterable<Collection<Comparable<?>>> clusters) throws TasteException {
Map<Comparable<?>, List<RecommendedItem>> recsPerUser = new FastMap<Comparable<?>, List<RecommendedItem>>();
- for (Collection<User> cluster : clusters) {
+ for (Collection<Comparable<?>> cluster : clusters) {
List<RecommendedItem> recs = computeTopRecsForCluster(cluster);
- for (User user : cluster) {
- recsPerUser.put(user.getID(), recs);
+ for (Comparable<?> userID : cluster) {
+ recsPerUser.put(userID, recs);
}
}
return Collections.unmodifiableMap(recsPerUser);
}
- private static List<RecommendedItem> computeTopRecsForCluster(Collection<User> cluster)
+ private List<RecommendedItem> computeTopRecsForCluster(Collection<Comparable<?>> cluster)
throws TasteException {
+ DataModel dataModel = getDataModel();
Collection<Comparable<?>> allItemIDs = new FastSet<Comparable<?>>();
- for (User user : cluster) {
- Preference[] prefs = user.getPreferencesAsArray();
- for (Preference pref : prefs) {
- allItemIDs.add(pref.getItemID());
+ for (Comparable<?> userID : cluster) {
+ PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
+ int size = prefs.length();
+ for (int i = 0; i < size; i++) {
+ allItemIDs.add(prefs.getItemID(i));
}
}
@@ -474,11 +472,13 @@
return Collections.unmodifiableList(topItems);
}
- private static Map<Comparable<?>, Collection<User>> computeClustersPerUserID(Collection<Collection<User>> clusters) {
- Map<Comparable<?>, Collection<User>> clustersPerUser = new FastMap<Comparable<?>, Collection<User>>(clusters.size());
- for (Collection<User> cluster : clusters) {
- for (User user : cluster) {
- clustersPerUser.put(user.getID(), cluster);
+ private static Map<Comparable<?>, Collection<Comparable<?>>>
+ computeClustersPerUserID(Collection<Collection<Comparable<?>>> clusters) {
+ Map<Comparable<?>, Collection<Comparable<?>>> clustersPerUser =
+ new FastMap<Comparable<?>, Collection<Comparable<?>>>(clusters.size());
+ for (Collection<Comparable<?>> cluster : clusters) {
+ for (Comparable<?> userID : cluster) {
+ clustersPerUser.put(userID, cluster);
}
}
return clustersPerUser;
@@ -494,21 +494,22 @@
return "TreeClusteringRecommender2[clusterSimilarity:" + clusterSimilarity + ']';
}
- private static class Estimator implements TopItems.Estimator<Comparable<?>> {
+ private class Estimator implements TopItems.Estimator<Comparable<?>> {
- private final Collection<User> cluster;
+ private final Collection<Comparable<?>> cluster;
- private Estimator(Collection<User> cluster) {
+ private Estimator(Collection<Comparable<?>> cluster) {
this.cluster = cluster;
}
@Override
- public double estimate(Comparable<?> itemID) {
+ public double estimate(Comparable<?> itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
RunningAverage average = new FullRunningAverage();
- for (User user : cluster) {
- Preference pref = user.getPreferenceFor(itemID);
+ for (Comparable<?> userID : cluster) {
+ Float pref = dataModel.getPreferenceValue(userID, itemID);
if (pref != null) {
- average.addDatum(pref.getValue());
+ average.addDatum(pref);
}
}
return average.getAverage();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java Tue Aug 4 00:06:46 2009
@@ -23,8 +23,7 @@
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Rescorer;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
@@ -61,27 +60,31 @@
}
- private double[] getInterpolations(Comparable<?> itemID, User theUser, List<Comparable<?>> itemNeighborhood) throws TasteException {
+ private double[] getInterpolations(Comparable<?> itemID, Comparable<?> userID, List<Comparable<?>> itemNeighborhood)
+ throws TasteException {
int k = itemNeighborhood.size();
double[][] A = new double[k][k];
double[] b = new double[k];
int i = 0;
+ DataModel dataModel = getDataModel();
+
int numUsers = getDataModel().getNumUsers();
for (Comparable<?> iitem : itemNeighborhood) {
- Preference[] iPrefs = getDataModel().getPreferencesForItemAsArray(iitem);
+ PreferenceArray iPrefs = getDataModel().getPreferencesForItem(iitem);
+ int iSize = iPrefs.length();
int j = 0;
for (Comparable<?> jitem : itemNeighborhood) {
double value = 0.0;
- for (Preference pi : iPrefs) {
- User v = pi.getUser();
- if (v.equals(theUser)) {
+ for (int pi = 0; pi < iSize; pi++) {
+ Comparable<?> v = iPrefs.getUserID(pi);
+ if (v.equals(userID)) {
continue;
}
- Preference pj = v.getPreferenceFor(jitem);
+ Float pj = dataModel.getPreferenceValue(userID, jitem);
if (pj != null) {
- value += pi.getValue() * pj.getValue();
+ value += iPrefs.getValue(pi) * pj;
}
}
A[i][j] = value / numUsers;
@@ -90,18 +93,19 @@
i++;
}
- Preference[] iPrefs = getDataModel().getPreferencesForItemAsArray(itemID);
+ PreferenceArray iPrefs = getDataModel().getPreferencesForItem(itemID);
+ int iSize = iPrefs.length();
i = 0;
for (Comparable<?> jitem : itemNeighborhood) {
double value = 0.0;
- for (Preference pi : iPrefs) {
- User v = pi.getUser();
- if (v.equals(theUser)) {
+ for (int pi = 0; pi < iSize; pi++) {
+ Comparable<?> v = iPrefs.getUserID(pi);
+ if (v.equals(userID)) {
continue;
}
- Preference pj = v.getPreferenceFor(jitem);
+ Float pj = dataModel.getPreferenceValue(userID, jitem);
if (pj != null) {
- value += pi.getValue() * pj.getValue();
+ value += iPrefs.getValue(pi) * pj;
}
}
b[i] = value / numUsers;
@@ -112,11 +116,14 @@
}
@Override
- protected double doEstimatePreference(User theUser, Comparable<?> itemID) throws TasteException {
+ protected float doEstimatePreference(Comparable<?> theUserID, Comparable<?> itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
Collection<Comparable<?>> allItemIDs = new FastSet<Comparable<?>>();
- for (Preference pref : theUser.getPreferencesAsArray()) {
- allItemIDs.add(pref.getItemID());
+ PreferenceArray prefs = dataModel.getPreferencesFromUser(theUserID);
+ int size = prefs.length();
+ for (int i = 0; i < size; i++) {
+ allItemIDs.add(prefs.getItemID(i));
}
allItemIDs.remove(itemID);
@@ -127,23 +134,23 @@
}
- double[] weights = getInterpolations(itemID, theUser, theNeighborhood);
+ double[] weights = getInterpolations(itemID, theUserID, theNeighborhood);
int i = 0;
double preference = 0.0;
double totalSimilarity = 0.0;
for (Comparable<?> jitem : theNeighborhood) {
- Preference pref = theUser.getPreferenceFor(jitem);
+ Float pref = dataModel.getPreferenceValue(theUserID, jitem);
if (pref != null) {
- preference += pref.getValue() * weights[i];
+ preference += pref * weights[i];
totalSimilarity += weights[i];
}
i++;
}
- return totalSimilarity == 0.0 ? Double.NaN : preference / totalSimilarity;
+ return totalSimilarity == 0.0 ? Float.NaN : (float) (preference / totalSimilarity);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java Tue Aug 4 00:06:46 2009
@@ -30,8 +30,7 @@
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -144,14 +143,14 @@
}
@Override
- public RunningAverage[] getDiffs(Comparable<?> userID, Comparable<?> itemID, Preference[] prefs)
+ public RunningAverage[] getDiffs(Comparable<?> userID, Comparable<?> itemID, PreferenceArray prefs)
throws TasteException {
try {
buildAverageDiffsLock.readLock().lock();
- int size = prefs.length;
+ int size = prefs.length();
RunningAverage[] result = new RunningAverage[size];
for (int i = 0; i < size; i++) {
- result[i] = getDiff(prefs[i].getItemID(), itemID);
+ result[i] = getDiff(prefs.getItemID(i), itemID);
}
return result;
} finally {
@@ -165,7 +164,7 @@
}
@Override
- public void updateItemPref(Comparable<?> itemID, double prefDelta, boolean remove) {
+ public void updateItemPref(Comparable<?> itemID, float prefDelta, boolean remove) {
if (!remove && stdDevWeighted) {
throw new UnsupportedOperationException("Can't update only when stdDevWeighted is set");
}
@@ -201,11 +200,10 @@
@Override
public Set<Comparable<?>> getRecommendableItemIDs(Comparable<?> userID) throws TasteException {
- User user = dataModel.getUser(userID);
Set<Comparable<?>> result = allRecommendableItemIDs.clone();
Iterator<Comparable<?>> it = result.iterator();
while (it.hasNext()) {
- if (user.getPreferenceFor(it.next()) != null) {
+ if (dataModel.getPreferenceValue(userID, it.next()) != null) {
it.remove();
}
}
@@ -218,8 +216,8 @@
buildAverageDiffsLock.writeLock().lock();
averageDiffs.clear();
long averageCount = 0L;
- for (User user : dataModel.getUsers()) {
- averageCount = processOneUser(averageCount, user);
+ for (Comparable<?> userID : dataModel.getUserIDs()) {
+ averageCount = processOneUser(averageCount, userID);
}
pruneInconsequentialDiffs();
@@ -263,15 +261,14 @@
allRecommendableItemIDs.rehash();
}
- private long processOneUser(long averageCount, User user) {
- log.debug("Processing prefs for user {}", user);
+ private long processOneUser(long averageCount, Comparable<?> userID) throws TasteException {
+ log.debug("Processing prefs for user {}", userID);
// Save off prefs for the life of this loop iteration
- Preference[] userPreferences = user.getPreferencesAsArray();
- int length = userPreferences.length;
+ PreferenceArray userPreferences = dataModel.getPreferencesFromUser(userID);
+ int length = userPreferences.length();
for (int i = 0; i < length; i++) {
- Preference prefA = userPreferences[i];
- double prefAValue = prefA.getValue();
- Comparable<?> itemIDA = prefA.getItemID();
+ double prefAValue = userPreferences.getValue(i);
+ Comparable<?> itemIDA = userPreferences.getItemID(i);
FastMap<Comparable<?>, RunningAverage> aMap = averageDiffs.get(itemIDA);
if (aMap == null) {
aMap = new FastMap<Comparable<?>, RunningAverage>();
@@ -279,8 +276,7 @@
}
for (int j = i + 1; j < length; j++) {
// This is a performance-critical block
- Preference prefB = userPreferences[j];
- Comparable<?> itemIDB = prefB.getItemID();
+ Comparable<?> itemIDB = userPreferences.getItemID(j);
RunningAverage average = aMap.get(itemIDB);
if (average == null && averageCount < maxEntries) {
average = buildRunningAverage();
@@ -288,7 +284,7 @@
averageCount++;
}
if (average != null) {
- average.addDatum(prefB.getValue() - prefAValue);
+ average.addDatum(userPreferences.getValue(j) - prefAValue);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java Tue Aug 4 00:06:46 2009
@@ -27,8 +27,7 @@
import org.apache.mahout.cf.taste.impl.recommender.AbstractRecommender;
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Rescorer;
import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
@@ -107,10 +106,9 @@
log.debug("Recommending items for user ID '{}'", userID);
- User theUser = getDataModel().getUser(userID);
Set<Comparable<?>> allItemIDs = diffStorage.getRecommendableItemIDs(userID);
- TopItems.Estimator<Comparable<?>> estimator = new Estimator(theUser);
+ TopItems.Estimator<Comparable<?>> estimator = new Estimator(userID);
List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItemIDs, rescorer, estimator);
@@ -119,25 +117,24 @@
}
@Override
- public double estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+ public float estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
DataModel model = getDataModel();
- User theUser = model.getUser(userID);
- Preference actualPref = theUser.getPreferenceFor(itemID);
+ Float actualPref = model.getPreferenceValue(userID, itemID);
if (actualPref != null) {
- return actualPref.getValue();
+ return actualPref;
}
- return doEstimatePreference(theUser, itemID);
+ return doEstimatePreference(userID, itemID);
}
- private double doEstimatePreference(User theUser, Comparable<?> itemID) throws TasteException {
+ private float doEstimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
double count = 0.0;
double totalPreference = 0.0;
- Preference[] prefs = theUser.getPreferencesAsArray();
- RunningAverage[] averages = diffStorage.getDiffs(theUser.getID(), itemID, prefs);
- for (int i = 0; i < prefs.length; i++) {
+ PreferenceArray prefs = getDataModel().getPreferencesFromUser(userID);
+ RunningAverage[] averages = diffStorage.getDiffs(userID, itemID, prefs);
+ int size = prefs.length();
+ for (int i = 0; i < size; i++) {
RunningAverage averageDiff = averages[i];
if (averageDiff != null) {
- Preference pref = prefs[i];
double averageDiffValue = averageDiff.getAverage();
if (weighted) {
double weight = (double) averageDiff.getCount();
@@ -152,30 +149,29 @@
// and disqualify this pref entirely
// (Thanks Daemmon)
}
- totalPreference += weight * (pref.getValue() + averageDiffValue);
+ totalPreference += weight * (prefs.getValue(i) + averageDiffValue);
count += weight;
} else {
- totalPreference += pref.getValue() + averageDiffValue;
+ totalPreference += prefs.getValue(i) + averageDiffValue;
count += 1.0;
}
}
}
if (count <= 0.0) {
RunningAverage itemAverage = diffStorage.getAverageItemPref(itemID);
- return itemAverage == null ? Double.NaN : itemAverage.getAverage();
+ return itemAverage == null ? Float.NaN : (float) itemAverage.getAverage();
} else {
- return totalPreference / count;
+ return (float) (totalPreference / count);
}
}
@Override
- public void setPreference(Comparable<?> userID, Comparable<?> itemID, double value) throws TasteException {
+ public void setPreference(Comparable<?> userID, Comparable<?> itemID, float value) throws TasteException {
DataModel dataModel = getDataModel();
- double prefDelta;
+ float prefDelta;
try {
- User theUser = dataModel.getUser(userID);
- Preference oldPref = theUser.getPreferenceFor(itemID);
- prefDelta = oldPref == null ? value : value - oldPref.getValue();
+ Float oldPref = dataModel.getPreferenceValue(userID, itemID);
+ prefDelta = oldPref == null ? value : value - oldPref;
} catch (NoSuchUserException nsee) {
prefDelta = value;
}
@@ -186,11 +182,10 @@
@Override
public void removePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
DataModel dataModel = getDataModel();
- User theUser = dataModel.getUser(userID);
- Preference oldPref = theUser.getPreferenceFor(itemID);
+ Float oldPref = dataModel.getPreferenceValue(userID, itemID);
super.removePreference(userID, itemID);
if (oldPref != null) {
- diffStorage.updateItemPref(itemID, oldPref.getValue(), true);
+ diffStorage.updateItemPref(itemID, oldPref, true);
}
}
@@ -208,15 +203,15 @@
private final class Estimator implements TopItems.Estimator<Comparable<?>> {
- private final User theUser;
+ private final Comparable<?> userID;
- private Estimator(User theUser) {
- this.theUser = theUser;
+ private Estimator(Comparable<?> userID) {
+ this.userID = userID;
}
@Override
public double estimate(Comparable<?> itemID) throws TasteException {
- return doEstimatePreference(theUser, itemID);
+ return doEstimatePreference(userID, itemID);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java Tue Aug 4 00:06:46 2009
@@ -25,7 +25,7 @@
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.impl.common.jdbc.AbstractJDBCComponent;
import org.apache.mahout.cf.taste.model.JDBCDataModel;
-import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -148,9 +148,9 @@
}
@Override
- public RunningAverage[] getDiffs(Comparable<?> userID, Comparable<?> itemID, Preference[] prefs)
+ public RunningAverage[] getDiffs(Comparable<?> userID, Comparable<?> itemID, PreferenceArray prefs)
throws TasteException {
- int size = prefs.length;
+ int size = prefs.length();
RunningAverage[] result = new RunningAverage[size];
Connection conn = null;
PreparedStatement stmt = null;
@@ -170,7 +170,7 @@
int i = 0;
while (rs.next()) {
Comparable<?> nextResultItemID = (Comparable<?>) rs.getObject(3);
- while (!prefs[i].getItemID().equals(nextResultItemID)) {
+ while (!prefs.getItemID(i).equals(nextResultItemID)) {
i++;
// result[i] is null for these values of i
}
@@ -215,7 +215,7 @@
}
@Override
- public void updateItemPref(Comparable<?> itemID, double prefDelta, boolean remove)
+ public void updateItemPref(Comparable<?> itemID, float prefDelta, boolean remove)
throws TasteException {
Connection conn = null;
try {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Tue Aug 4 00:06:46 2009
@@ -31,7 +31,7 @@
* <tr><td>234</td><td>789</td><td>2.1</td><td>1</td></tr> </table>
*
* <p><code>item_id_a</code> and <code>item_id_b</code> may have types compatible with Java String, or integer
- * primitive types. <code>average_diff</code> must be compatible with <code>double</code> and
+ * primitive types. <code>average_diff</code> must be compatible with <code>float</code> and
* <code>count</code> must be compatible with <code>int</code>.</p>
*
* <p>The following command sets up a suitable table in MySQL:</p>
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java Tue Aug 4 00:06:46 2009
@@ -30,7 +30,6 @@
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.recommender.Rescorer;
@@ -84,8 +83,8 @@
userMap = new FastMap<Comparable<?>, Integer>(numUsers);
int idx = 0;
- for (User user : dataModel.getUsers()) {
- userMap.put(user.getID(), idx++);
+ for (Comparable<?> userID : dataModel.getUserIDs()) {
+ userMap.put(userID, idx++);
}
int numItems = dataModel.getNumItems();
@@ -117,8 +116,9 @@
private void recachePreferences() throws TasteException {
cachedPreferences.clear();
- for (User user : getDataModel().getUsers()) {
- for (Preference pref : user.getPreferences()) {
+ DataModel dataModel = getDataModel();
+ for (Comparable<?> userID : dataModel.getUserIDs()) {
+ for (Preference pref : dataModel.getPreferencesFromUser(userID)) {
cachedPreferences.add(pref);
}
}
@@ -126,8 +126,9 @@
private double getAveragePreference() throws TasteException {
RunningAverage average = new FullRunningAverage();
- for (User user : getDataModel().getUsers()) {
- for (Preference pref : user.getPreferences()) {
+ DataModel dataModel = getDataModel();
+ for (Comparable<?> userID : dataModel.getUserIDs()) {
+ for (Preference pref : dataModel.getPreferencesFromUser(userID)) {
average.addDatum(pref.getValue());
}
}
@@ -144,20 +145,20 @@
Collections.shuffle(cachedPreferences, random);
for (int i = 0; i < numFeatures; i++) {
for (Preference pref : cachedPreferences) {
- int useridx = userMap.get(pref.getUser().getID());
+ int useridx = userMap.get(pref.getUserID());
int itemidx = itemMap.get(pref.getItemID());
emSvd.train(useridx, itemidx, i, pref.getValue());
}
}
}
- private double predictRating(int user, int item) {
- return emSvd.getDotProduct(user, item);
+ private float predictRating(int user, int item) {
+ return (float) emSvd.getDotProduct(user, item);
}
@Override
- public double estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+ public float estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
Integer useridx = userMap.get(userID);
if (useridx == null) {
throw new NoSuchUserException();
@@ -182,11 +183,9 @@
log.debug("Recommending items for user ID '{}'", userID);
- User theUser = getDataModel().getUser(userID);
+ Set<Comparable<?>> allItemIDs = getAllOtherItems(userID);
- Set<Comparable<?>> allItemIDs = getAllOtherItems(theUser);
-
- TopItems.Estimator<Comparable<?>> estimator = new Estimator(theUser);
+ TopItems.Estimator<Comparable<?>> estimator = new Estimator(userID);
List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItemIDs, rescorer, estimator);
@@ -206,15 +205,15 @@
private final class Estimator implements TopItems.Estimator<Comparable<?>> {
- private final User theUser;
+ private final Comparable<?> theUserID;
- private Estimator(User theUser) {
- this.theUser = theUser;
+ private Estimator(Comparable<?> theUserID) {
+ this.theUserID = theUserID;
}
@Override
public double estimate(Comparable<?> itemID) throws TasteException {
- return estimatePreference(theUser.getID(), itemID);
+ return estimatePreference(theUserID, itemID);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java Tue Aug 4 00:06:46 2009
@@ -23,7 +23,7 @@
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
@@ -127,30 +127,32 @@
* @param sumY2 sum of the square of the user/item preference values, over the second item/user
* @param sumXYdiff2 sum of squares of differences in X and Y values
* @return similarity value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no similarity can be computed
- * (e.g. when no items have been rated by both {@link User}s
+ * (e.g. when no items have been rated by both uesrs
*/
abstract double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2);
@Override
- public double userSimilarity(User user1, User user2) throws TasteException {
+ public double userSimilarity(Comparable<?> userID1, Comparable<?> userID2) throws TasteException {
- if (user1 == null || user2 == null) {
- throw new IllegalArgumentException("user1 or user2 is null");
+ if (userID1 == null || userID2 == null) {
+ throw new IllegalArgumentException("userID1 or userID2 is null");
}
- Preference[] xPrefs = user1.getPreferencesAsArray();
- Preference[] yPrefs = user2.getPreferencesAsArray();
+ PreferenceArray xPrefs = dataModel.getPreferencesFromUser(userID1);
+ PreferenceArray yPrefs = dataModel.getPreferencesFromUser(userID2);
+ int xLength = xPrefs.length();
+ int yLength = yPrefs.length();
- if (xPrefs.length == 0 || yPrefs.length == 0) {
+ if (xLength == 0 || yLength == 0) {
return Double.NaN;
}
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
+ Preference xPref = xPrefs.get(0);
+ Preference yPref = yPrefs.get(0);
Comparable<?> xIndex = xPref.getItemID();
Comparable<?> yIndex = yPref.getItemID();
- int xPrefIndex = 1;
- int yPrefIndex = 1;
+ int xPrefIndex = 0;
+ int yPrefIndex = 0;
double sumX = 0.0;
double sumX2 = 0.0;
@@ -183,11 +185,11 @@
if (compare < 0) {
// X has a value; infer Y's
x = hasPrefTransform ? prefTransform.getTransformedValue(xPref) : xPref.getValue();
- y = inferrer.inferPreference(user2, xIndex);
+ y = inferrer.inferPreference(userID2, xIndex);
} else {
// compare > 0
// Y has a value; infer X's
- x = inferrer.inferPreference(user1, yIndex);
+ x = inferrer.inferPreference(userID1, yIndex);
y = hasPrefTransform ? prefTransform.getTransformedValue(yPref) : yPref.getValue();
}
}
@@ -201,17 +203,17 @@
count++;
}
if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
+ if (++xPrefIndex >= xLength) {
break;
}
- xPref = xPrefs[xPrefIndex++];
+ xPref = xPrefs.get(xPrefIndex);
xIndex = xPref.getItemID();
}
if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
+ if (++yPrefIndex >= yLength) {
break;
}
- yPref = yPrefs[yPrefIndex++];
+ yPref = yPrefs.get(yPrefIndex);
yIndex = yPref.getItemID();
}
}
@@ -230,7 +232,7 @@
double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
if (similarityTransform != null) {
- result = similarityTransform.transformSimilarity(user1, user2, result);
+ result = similarityTransform.transformSimilarity(userID1, userID2, result);
}
if (!Double.isNaN(result)) {
@@ -246,17 +248,19 @@
throw new IllegalArgumentException("item1 or item2 is null");
}
- Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(itemID1);
- Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(itemID2);
+ PreferenceArray xPrefs = dataModel.getPreferencesForItem(itemID1);
+ PreferenceArray yPrefs = dataModel.getPreferencesForItem(itemID2);
+ int xLength = xPrefs.length();
+ int yLength = yPrefs.length();
- if (xPrefs.length == 0 || yPrefs.length == 0) {
+ if (xLength == 0 || yLength == 0) {
return Double.NaN;
}
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
- User xIndex = xPref.getUser();
- User yIndex = yPref.getUser();
+ Preference xPref = xPrefs.get(0);
+ Preference yPref = yPrefs.get(0);
+ Comparable<?> xIndex = xPref.getUserID();
+ Comparable<?> yIndex = yPref.getUserID();
int xPrefIndex = 1;
int yPrefIndex = 1;
@@ -271,7 +275,7 @@
// No, pref inferrers and transforms don't appy here. I think.
while (true) {
- int compare = xIndex.compareTo(yIndex);
+ int compare = ((Comparable<Object>) xIndex).compareTo(yIndex);
if (compare == 0) {
// Both users expressed a preference for the item
double x = xPref.getValue();
@@ -286,18 +290,18 @@
count++;
}
if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
+ if (xPrefIndex == xLength) {
break;
}
- xPref = xPrefs[xPrefIndex++];
- xIndex = xPref.getUser();
+ xPref = xPrefs.get(xPrefIndex++);
+ xIndex = xPref.getUserID();
}
if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
+ if (yPrefIndex == yLength) {
break;
}
- yPref = yPrefs[yPrefIndex++];
- yIndex = yPref.getUser();
+ yPref = yPrefs.get(yPrefIndex++);
+ yIndex = yPref.getUserID();
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java Tue Aug 4 00:06:46 2009
@@ -24,34 +24,36 @@
import org.apache.mahout.cf.taste.impl.common.Retriever;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import java.util.Collection;
/**
- * <p>Implementations of this interface compute an inferred preference for a {@link User} and an item that the
+ * <p>Implementations of this interface compute an inferred preference for a user and an item that the
* user has not expressed any preference for. This might be an average of other preferences scores from that user, for
* example. This technique is sometimes called "default voting".</p>
*/
public final class AveragingPreferenceInferrer implements PreferenceInferrer {
- private static final Retriever<User, Double> RETRIEVER = new PrefRetriever();
+ private static final Float ZERO = 0.0f;
- private final Cache<User, Double> averagePreferenceValue;
+ private final DataModel dataModel;
+ private final Cache<Comparable<?>, Float> averagePreferenceValue;
public AveragingPreferenceInferrer(DataModel dataModel) throws TasteException {
- averagePreferenceValue = new Cache<User, Double>(RETRIEVER, dataModel.getNumUsers());
+ this.dataModel = dataModel;
+ Retriever<Comparable<?>, Float> retriever = new PrefRetriever();
+ averagePreferenceValue = new Cache<Comparable<?>, Float>(retriever, dataModel.getNumUsers());
refresh(null);
}
@Override
- public double inferPreference(User user, Comparable<?> itemID) throws TasteException {
- if (user == null || itemID == null) {
- throw new IllegalArgumentException("user or item is null");
+ public float inferPreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+ if (userID == null || itemID == null) {
+ throw new IllegalArgumentException("userID or item is null");
}
- return averagePreferenceValue.get(user);
+ return averagePreferenceValue.get(userID);
}
@Override
@@ -59,20 +61,20 @@
averagePreferenceValue.clear();
}
- private static final class PrefRetriever implements Retriever<User, Double> {
- private static final Double ZERO = 0.0;
+ private final class PrefRetriever implements Retriever<Comparable<?>, Float> {
@Override
- public Double get(User key) {
+ public Float get(Comparable<?> key) throws TasteException {
RunningAverage average = new FullRunningAverage();
- Preference[] prefs = key.getPreferencesAsArray();
- if (prefs.length == 0) {
+ PreferenceArray prefs = dataModel.getPreferencesFromUser(key);
+ int size = prefs.length();
+ if (size == 0) {
return ZERO;
}
- for (Preference pref : prefs) {
- average.addDatum(pref.getValue());
+ for (int i = 0; i < size; i++) {
+ average.addDatum(prefs.getValue(i));
}
- return average.getAverage();
+ return (float) average.getAverage();
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java Tue Aug 4 00:06:46 2009
@@ -24,7 +24,6 @@
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.Retriever;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.User;
import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
@@ -34,7 +33,7 @@
public final class CachingUserSimilarity implements UserSimilarity {
private final UserSimilarity similarity;
- private final Cache<Pair<User, User>, Double> similarityCache;
+ private final Cache<Pair<Comparable<?>, Comparable<?>>, Double> similarityCache;
public CachingUserSimilarity(UserSimilarity similarity, DataModel dataModel) throws TasteException {
if (similarity == null) {
@@ -42,14 +41,14 @@
}
this.similarity = similarity;
int maxCacheSize = dataModel.getNumUsers(); // just a dumb heuristic for sizing
- this.similarityCache = new Cache<Pair<User, User>, Double>(new SimilarityRetriever(similarity), maxCacheSize);
+ this.similarityCache = new Cache<Pair<Comparable<?>, Comparable<?>>, Double>(new SimilarityRetriever(similarity), maxCacheSize);
}
@Override
- public double userSimilarity(User user1, User user2) throws TasteException {
- Pair<User, User> key = user1.compareTo(user2) < 0 ?
- new Pair<User, User>(user1, user2) :
- new Pair<User, User>(user2, user1);
+ public double userSimilarity(Comparable<?> userID1, Comparable<?> userID2) throws TasteException {
+ Pair<Comparable<?>, Comparable<?>> key = ((Comparable<Object>) userID1).compareTo(userID2) < 0 ?
+ new Pair<Comparable<?>, Comparable<?>>(userID1, userID2) :
+ new Pair<Comparable<?>, Comparable<?>>(userID2, userID1);
return similarityCache.get(key);
}
@@ -66,7 +65,7 @@
RefreshHelper.maybeRefresh(alreadyRefreshed, similarity);
}
- private static final class SimilarityRetriever implements Retriever<Pair<User, User>, Double> {
+ private static final class SimilarityRetriever implements Retriever<Pair<Comparable<?>, Comparable<?>>, Double> {
private final UserSimilarity similarity;
private SimilarityRetriever(UserSimilarity similarity) {
@@ -74,7 +73,7 @@
}
@Override
- public Double get(Pair<User, User> key) throws TasteException {
+ public Double get(Pair<Comparable<?>, Comparable<?>> key) throws TasteException {
return similarity.userSimilarity(key.getFirst(), key.getSecond());
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java Tue Aug 4 00:06:46 2009
@@ -22,8 +22,8 @@
import org.apache.mahout.cf.taste.model.DataModel;
/**
- * <p>An implementation of a "similarity" based on the Euclidean "distance" between two {@link
- * org.apache.mahout.cf.taste.model.User}s X and Y. Thinking of items as dimensions and preferences as points along
+ * <p>An implementation of a "similarity" based on the Euclidean "distance" between two
+ * users X and Y. Thinking of items as dimensions and preferences as points along
* those dimensions, a distance is computed using all items (dimensions) where both users have expressed a preference
* for that item. This is simply the square root of the sum of the squares of differences in position (preference) along
* each dimension. The similarity is then computed as 1 / (1 + distance), so the resulting values are in the range
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java Tue Aug 4 00:06:46 2009
@@ -25,7 +25,6 @@
import org.apache.mahout.cf.taste.impl.common.RandomUtils;
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.User;
import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
@@ -37,7 +36,8 @@
public final class GenericUserSimilarity implements UserSimilarity {
- private final Map<User, Map<User, Double>> similarityMaps = new FastMap<User, Map<User, Double>>();
+ private final Map<Comparable<?>, Map<Comparable<?>, Double>> similarityMaps =
+ new FastMap<Comparable<?>, Map<Comparable<?>, Double>>();
public GenericUserSimilarity(Iterable<UserUserSimilarity> similarities) {
initSimilarityMaps(similarities);
@@ -49,15 +49,15 @@
}
public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel) throws TasteException {
- List<? extends User> users = IteratorUtils.iterableToList(dataModel.getUsers());
- Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, users);
+ List<? extends Comparable<?>> userIDs = IteratorUtils.iterableToList(dataModel.getUserIDs());
+ Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, userIDs);
initSimilarityMaps(new IteratorIterable<UserUserSimilarity>(it));
}
public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
throws TasteException {
- List<? extends User> users = IteratorUtils.iterableToList(dataModel.getUsers());
- Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, users);
+ List<? extends Comparable<?>> userIDs = IteratorUtils.iterableToList(dataModel.getUserIDs());
+ Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, userIDs);
Iterable<UserUserSimilarity> keptSimilarities =
TopItems.getTopUserUserSimilarities(maxToKeep, new IteratorIterable<UserUserSimilarity>(it));
initSimilarityMaps(keptSimilarities);
@@ -65,13 +65,13 @@
private void initSimilarityMaps(Iterable<UserUserSimilarity> similarities) {
for (UserUserSimilarity uuc : similarities) {
- User similarityUser1 = uuc.getUser1();
- User similarityUser2 = uuc.getUser2();
- int compare = similarityUser1.compareTo(similarityUser2);
+ Comparable<?> similarityUser1 = uuc.getUserID1();
+ Comparable<?> similarityUser2 = uuc.getUserID2();
+ int compare = ((Comparable<Object>) similarityUser1).compareTo(similarityUser2);
if (compare != 0) {
// Order them -- first key should be the "smaller" one
- User user1;
- User user2;
+ Comparable<?> user1;
+ Comparable<?> user2;
if (compare < 0) {
user1 = similarityUser1;
user2 = similarityUser2;
@@ -79,9 +79,9 @@
user1 = similarityUser2;
user2 = similarityUser1;
}
- Map<User, Double> map = similarityMaps.get(user1);
+ Map<Comparable<?>, Double> map = similarityMaps.get(user1);
if (map == null) {
- map = new FastMap<User, Double>();
+ map = new FastMap<Comparable<?>, Double>();
similarityMaps.put(user1, map);
}
map.put(user2, uuc.getValue());
@@ -91,21 +91,21 @@
}
@Override
- public double userSimilarity(User user1, User user2) {
- int compare = user1.compareTo(user2);
+ public double userSimilarity(Comparable<?> userID1, Comparable<?> userID2) {
+ int compare = ((Comparable<Object>) userID1).compareTo(userID2);
if (compare == 0) {
return 1.0;
}
- User first;
- User second;
+ Comparable<?> first;
+ Comparable<?> second;
if (compare < 0) {
- first = user1;
- second = user2;
+ first = userID1;
+ second = userID2;
} else {
- first = user2;
- second = user1;
+ first = userID2;
+ second = userID1;
}
- Map<User, Double> nextMap = similarityMaps.get(first);
+ Map<Comparable<?>, Double> nextMap = similarityMaps.get(first);
if (nextMap == null) {
return Double.NaN;
}
@@ -125,28 +125,28 @@
public static final class UserUserSimilarity implements Comparable<UserUserSimilarity> {
- private final User user1;
- private final User user2;
+ private final Comparable<?> userID1;
+ private final Comparable<?> userID2;
private final double value;
- public UserUserSimilarity(User user1, User user2, double value) {
- if (user1 == null || user2 == null) {
+ public UserUserSimilarity(Comparable<?> userID1, Comparable<?> userID2, double value) {
+ if (userID1 == null || userID2 == null) {
throw new IllegalArgumentException("A user is null");
}
if (Double.isNaN(value) || value < -1.0 || value > 1.0) {
throw new IllegalArgumentException("Illegal value: " + value);
}
- this.user1 = user1;
- this.user2 = user2;
+ this.userID1 = userID1;
+ this.userID2 = userID2;
this.value = value;
}
- public User getUser1() {
- return user1;
+ public Comparable<?> getUserID1() {
+ return userID1;
}
- public User getUser2() {
- return user2;
+ public Comparable<?> getUserID2() {
+ return userID2;
}
public double getValue() {
@@ -155,7 +155,7 @@
@Override
public String toString() {
- return "UserUserSimilarity[" + user1 + ',' + user2 + ':' + value + ']';
+ return "UserUserSimilarity[" + userID1 + ',' + userID2 + ':' + value + ']';
}
/** Defines an ordering from highest similarity to lowest. */
@@ -171,12 +171,12 @@
return false;
}
UserUserSimilarity otherSimilarity = (UserUserSimilarity) other;
- return otherSimilarity.user1.equals(user1) && otherSimilarity.user2.equals(user2) && otherSimilarity.value == value;
+ return otherSimilarity.userID1.equals(userID1) && otherSimilarity.userID2.equals(userID2) && otherSimilarity.value == value;
}
@Override
public int hashCode() {
- return user1.hashCode() ^ user2.hashCode() ^ RandomUtils.hashDouble(value);
+ return userID1.hashCode() ^ userID2.hashCode() ^ RandomUtils.hashDouble(value);
}
}
@@ -184,18 +184,18 @@
private static final class DataModelSimilaritiesIterator implements Iterator<UserUserSimilarity> {
private final UserSimilarity otherSimilarity;
- private final List<? extends User> users;
+ private final List<? extends Comparable<?>> userIDs;
private final int size;
private int i;
- private User user1;
+ private Comparable<?> userID1;
private int j;
- private DataModelSimilaritiesIterator(UserSimilarity otherSimilarity, List<? extends User> users) {
+ private DataModelSimilaritiesIterator(UserSimilarity otherSimilarity, List<? extends Comparable<?>> userIDs) {
this.otherSimilarity = otherSimilarity;
- this.users = users;
- this.size = users.size();
+ this.userIDs = userIDs;
+ this.size = userIDs.size();
i = 0;
- user1 = users.get(0);
+ userID1 = userIDs.get(0);
j = 1;
}
@@ -209,19 +209,19 @@
if (!hasNext()) {
throw new NoSuchElementException();
}
- User user2 = users.get(j);
+ Comparable<?> userID2 = userIDs.get(j);
double similarity;
try {
- similarity = otherSimilarity.userSimilarity(user1, user2);
+ similarity = otherSimilarity.userSimilarity(userID1, userID2);
} catch (TasteException te) {
// ugly:
throw new RuntimeException(te);
}
- UserUserSimilarity result = new UserUserSimilarity(user1, user2, similarity);
+ UserUserSimilarity result = new UserUserSimilarity(userID1, userID2, similarity);
j++;
if (j == size) {
i++;
- user1 = users.get(i);
+ userID1 = userIDs.get(i);
j = i + 1;
}
return result;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java Tue Aug 4 00:06:46 2009
@@ -19,10 +19,9 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
@@ -47,57 +46,24 @@
}
@Override
- public double userSimilarity(User user1, User user2) throws TasteException {
- if (user1 == null || user2 == null) {
- throw new IllegalArgumentException("user1 or user2 is null");
- }
-
- Preference[] xPrefs = user1.getPreferencesAsArray();
- Preference[] yPrefs = user2.getPreferencesAsArray();
-
- if (xPrefs.length == 0 && yPrefs.length == 0) {
- return Double.NaN;
- }
- if (xPrefs.length == 0 || yPrefs.length == 0) {
- return 0.0;
- }
+ public double userSimilarity(Comparable<?> userID1, Comparable<?> userID2) throws TasteException {
- int intersectionSize = findIntersectionSize(xPrefs, yPrefs);
+ FastSet<Comparable<?>> prefs1 = dataModel.getItemIDsFromUser(userID1);
+ FastSet<Comparable<?>> prefs2 = dataModel.getItemIDsFromUser(userID2);
+ int prefs1Size = prefs1.size();
+ int prefs2Size = prefs2.size();
+ int intersectionSize = prefs1Size < prefs2Size ?
+ prefs2.intersectionSize(prefs1) :
+ prefs1.intersectionSize(prefs2);
int numItems = dataModel.getNumItems();
- double logLikelihood =
- twoLogLambda(intersectionSize, xPrefs.length - intersectionSize, yPrefs.length, numItems - yPrefs.length);
+ double logLikelihood = LogLikelihoodSimilarity.twoLogLambda(intersectionSize,
+ prefs1Size - intersectionSize,
+ prefs2Size,
+ numItems - prefs2Size);
return 1.0 - 1.0 / (1.0 + logLikelihood);
}
- static int findIntersectionSize(Preference[] xPrefs, Preference[] yPrefs) {
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
- int xPrefIndex = 1;
- int yPrefIndex = 1;
-
- int intersectionSize = 0;
- while (true) {
- int compare = ((Comparable<Object>) xPref.getItemID()).compareTo(yPref.getItemID());
- if (compare == 0) {
- intersectionSize++;
- }
- if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
- break;
- }
- xPref = xPrefs[xPrefIndex++];
- }
- if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
- break;
- }
- yPref = yPrefs[yPrefIndex++];
- }
- }
- return intersectionSize;
- }
-
@Override
public double itemSimilarity(Comparable<?> itemID1, Comparable<?> itemID2) throws TasteException {
if (itemID1 == null || itemID2 == null) {
@@ -122,7 +88,7 @@
}
private static double safeLog(double d) {
- return d <= 0.0 ? 0 : Math.log(d);
+ return d <= 0.0 ? 0.0 : Math.log(d);
}
@Override
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java Tue Aug 4 00:06:46 2009
@@ -20,10 +20,9 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.User;
/**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values are calculated:</p>
+ * <p>An implementation of the Pearson correlation. For users X and Y, the following values are calculated:</p>
*
* <ul> <li>sumX2: sum of the square of all X's preference values</li> <li>sumY2: sum of the square of all Y's
* preference values</li> <li>sumXY: sum of the product of X and Y's preference value for all items for which both X and