You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/08/04 02:06:50 UTC

svn commit: r800634 [4/7] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/impl/common/ core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/ core/src/main/java/org...

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java Tue Aug  4 00:06:46 2009
@@ -27,8 +27,7 @@
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
 import org.apache.mahout.cf.taste.impl.common.RunningAverage;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.ClusteringRecommender;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.cf.taste.recommender.Rescorer;
@@ -45,17 +44,17 @@
 import java.util.concurrent.locks.ReentrantLock;
 
 /**
- * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters {@link User}s, then determines the
+ * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters users, then determines the
  * clusters' top recommendations. This implementation builds clusters by repeatedly merging clusters until only a
  * certain number remain, meaning that each cluster is sort of a tree of other clusters.</p>
  *
  * <p>This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:</p>
  *
  * <ul>
- * <li>For all {@link User}s in a cluster, recommendations will be the same</li>
+ * <li>For all users in a cluster, recommendations will be the same</li>
  * <li>{@link #estimatePreference(Comparable, Comparable)} may well return {@link Double#NaN};
  *  it does so when asked to estimate preference for an item for
- *  which no preference is expressed in the {@link User}s in the cluster.</li>
+ *  which no preference is expressed in the users in the cluster.</li>
  * </ul>
  */
 public final class TreeClusteringRecommender extends AbstractRecommender implements ClusteringRecommender {
@@ -70,14 +69,14 @@
   private final boolean clusteringByThreshold;
   private final double samplingRate;
   private Map<Comparable<?>, List<RecommendedItem>> topRecsByUserID;
-  private Collection<Collection<User>> allClusters;
-  private Map<Comparable<?>, Collection<User>> clustersByUserID;
+  private Collection<Collection<Comparable<?>>> allClusters;
+  private Map<Comparable<?>, Collection<Comparable<?>>> clustersByUserID;
   private boolean clustersBuilt;
   private final ReentrantLock buildClustersLock;
   private final RefreshHelper refreshHelper;
 
   /**
-   * @param dataModel         {@link DataModel} which provdes {@link User}s
+   * @param dataModel         {@link DataModel} which provdes users
    * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
    * @param numClusters       desired number of clusters to create
    * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is less than 2
@@ -89,7 +88,7 @@
   }
 
   /**
-   * @param dataModel         {@link DataModel} which provdes {@link User}s
+   * @param dataModel         {@link DataModel} which provdes users
    * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
    * @param numClusters       desired number of clusters to create
    * @param samplingRate      percentage of all cluster-cluster pairs to consider when finding next-most-similar
@@ -129,7 +128,7 @@
   }
 
   /**
-   * @param dataModel           {@link DataModel} which provdes {@link User}s
+   * @param dataModel           {@link DataModel} which provdes users
    * @param clusterSimilarity   {@link ClusterSimilarity} used to compute cluster similarity
    * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger clusters until
    *                            the next two nearest clusters' similarity drops below this threshold
@@ -143,7 +142,7 @@
   }
 
   /**
-   * @param dataModel           {@link DataModel} which provides {@link User}s
+   * @param dataModel           {@link DataModel} which provides users
    * @param clusterSimilarity   {@link ClusterSimilarity} used to compute cluster similarity
    * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger clusters until
    *                            the next two nearest clusters' similarity drops below this threshold
@@ -203,7 +202,7 @@
       return Collections.emptyList();
     }
 
-    User theUser = getDataModel().getUser(userID);
+    DataModel dataModel = getDataModel();
     List<RecommendedItem> rescored = new ArrayList<RecommendedItem>(recommended.size());
     // Only add items the user doesn't already have a preference for.
     // And that the rescorer doesn't "reject".
@@ -212,7 +211,7 @@
       if (rescorer != null && rescorer.isFiltered(itemID)) {
         continue;
       }
-      if (theUser.getPreferenceFor(itemID) == null &&
+      if (dataModel.getPreferenceValue(userID, itemID) == null &&
           (rescorer == null || !Double.isNaN(rescorer.rescore(itemID, recommendedItem.getValue())))) {
         rescored.add(recommendedItem);
       }
@@ -223,15 +222,14 @@
   }
 
   @Override
-  public double estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+  public float estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
     if (userID == null || itemID == null) {
       throw new IllegalArgumentException("userID or itemID is null");
     }
     DataModel model = getDataModel();
-    User theUser = model.getUser(userID);
-    Preference actualPref = theUser.getPreferenceFor(itemID);
+    Float actualPref = model.getPreferenceValue(userID, itemID);
     if (actualPref != null) {
-      return actualPref.getValue();
+      return actualPref;
     }
     checkClustersBuilt();
     List<RecommendedItem> topRecsForUser = topRecsByUserID.get(userID);
@@ -243,21 +241,21 @@
       }
     }
     // Hmm, we have no idea. The item is not in the user's cluster
-    return Double.NaN;
+    return Float.NaN;
   }
 
   @Override
-  public Collection<User> getCluster(Comparable<?> userID) throws TasteException {
+  public Collection<Comparable<?>> getCluster(Comparable<?> userID) throws TasteException {
     if (userID == null) {
       throw new IllegalArgumentException("userID is null");
     }
     checkClustersBuilt();
-    Collection<User> cluster = clustersByUserID.get(userID);
-    return cluster == null ? Collections.<User>emptyList() : cluster;
+    Collection<Comparable<?>> cluster = clustersByUserID.get(userID);
+    return cluster == null ? Collections.<Comparable<?>>emptyList() : cluster;
   }
 
   @Override
-  public Collection<Collection<User>> getClusters() throws TasteException {
+  public Collection<Collection<Comparable<?>>> getClusters() throws TasteException {
     checkClustersBuilt();
     return allClusters;
   }
@@ -274,17 +272,14 @@
       DataModel model = getDataModel();
       int numUsers = model.getNumUsers();
       if (numUsers > 0) {
-        List<Collection<User>> newClusters = new ArrayList<Collection<User>>(numUsers);
-        if (numUsers == 1) {
-          User onlyUser = model.getUsers().iterator().next();
-          newClusters.add(Collections.singleton(onlyUser));
-        } else {
-          // Begin with a cluster for each user:
-          for (User user : model.getUsers()) {
-            Collection<User> newCluster = new FastSet<User>();
-            newCluster.add(user);
-            newClusters.add(newCluster);
-          }
+        List<Collection<Comparable<?>>> newClusters = new ArrayList<Collection<Comparable<?>>>(numUsers);
+        // Begin with a cluster for each user:
+        for (Comparable<?> userID : model.getUserIDs()) {
+          Collection<Comparable<?>> newCluster = new FastSet<Comparable<?>>();
+          newCluster.add(userID);
+          newClusters.add(newCluster);
+        }
+        if (numUsers > 1) {
           findClusters(newClusters);
         }
         topRecsByUserID = computeTopRecsPerUserID(newClusters);
@@ -301,16 +296,16 @@
     }
   }
 
-  private void findClusters(List<Collection<User>> newClusters) throws TasteException {
+  private void findClusters(List<Collection<Comparable<?>>> newClusters) throws TasteException {
     if (clusteringByThreshold) {
-      Pair<Collection<User>, Collection<User>> nearestPair = findNearestClusters(newClusters);
+      Pair<Collection<Comparable<?>>, Collection<Comparable<?>>> nearestPair = findNearestClusters(newClusters);
       if (nearestPair != null) {
-        Collection<User> cluster1 = nearestPair.getFirst();
-        Collection<User> cluster2 = nearestPair.getSecond();
+        Collection<Comparable<?>> cluster1 = nearestPair.getFirst();
+        Collection<Comparable<?>> cluster2 = nearestPair.getSecond();
         while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold) {
           newClusters.remove(cluster1);
           newClusters.remove(cluster2);
-          Collection<User> merged = new FastSet<User>(cluster1.size() + cluster2.size());
+          Collection<Comparable<?>> merged = new FastSet<Comparable<?>>(cluster1.size() + cluster2.size());
           merged.addAll(cluster1);
           merged.addAll(cluster2);
           newClusters.add(merged);
@@ -324,16 +319,16 @@
       }
     } else {
       while (newClusters.size() > numClusters) {
-        Pair<Collection<User>, Collection<User>> nearestPair =
+        Pair<Collection<Comparable<?>>, Collection<Comparable<?>>> nearestPair =
             findNearestClusters(newClusters);
         if (nearestPair == null) {
           break;
         }
-        Collection<User> cluster1 = nearestPair.getFirst();
-        Collection<User> cluster2 = nearestPair.getSecond();
+        Collection<Comparable<?>> cluster1 = nearestPair.getFirst();
+        Collection<Comparable<?>> cluster2 = nearestPair.getSecond();
         newClusters.remove(cluster1);
         newClusters.remove(cluster2);
-        Collection<User> merged = new FastSet<User>(cluster1.size() + cluster2.size());
+        Collection<Comparable<?>> merged = new FastSet<Comparable<?>>(cluster1.size() + cluster2.size());
         merged.addAll(cluster1);
         merged.addAll(cluster2);
         newClusters.add(merged);
@@ -341,20 +336,20 @@
     }
   }
 
-  private Pair<Collection<User>, Collection<User>> findNearestClusters(List<Collection<User>> clusters)
-      throws TasteException {
+  private Pair<Collection<Comparable<?>>, Collection<Comparable<?>>>
+      findNearestClusters(List<Collection<Comparable<?>>> clusters) throws TasteException {
     int size = clusters.size();
-    Pair<Collection<User>, Collection<User>> nearestPair = null;
+    Pair<Collection<Comparable<?>>, Collection<Comparable<?>>> nearestPair = null;
     double bestSimilarity = Double.NEGATIVE_INFINITY;
     for (int i = 0; i < size; i++) {
-      Collection<User> cluster1 = clusters.get(i);
+      Collection<Comparable<?>> cluster1 = clusters.get(i);
       for (int j = i + 1; j < size; j++) {
         if (samplingRate >= 1.0 || r.nextDouble() < samplingRate) {
-          Collection<User> cluster2 = clusters.get(j);
+          Collection<Comparable<?>> cluster2 = clusters.get(j);
           double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
           if (!Double.isNaN(similarity) && similarity > bestSimilarity) {
             bestSimilarity = similarity;
-            nearestPair = new Pair<Collection<User>, Collection<User>>(cluster1, cluster2);
+            nearestPair = new Pair<Collection<Comparable<?>>, Collection<Comparable<?>>>(cluster1, cluster2);
           }
         }
       }
@@ -362,26 +357,27 @@
     return nearestPair;
   }
 
-  private static Map<Comparable<?>, List<RecommendedItem>> computeTopRecsPerUserID(
-      Iterable<Collection<User>> clusters) throws TasteException {
+  private Map<Comparable<?>, List<RecommendedItem>> computeTopRecsPerUserID(
+      Iterable<Collection<Comparable<?>>> clusters) throws TasteException {
     Map<Comparable<?>, List<RecommendedItem>> recsPerUser = new FastMap<Comparable<?>, List<RecommendedItem>>();
-    for (Collection<User> cluster : clusters) {
+    for (Collection<Comparable<?>> cluster : clusters) {
       List<RecommendedItem> recs = computeTopRecsForCluster(cluster);
-      for (User user : cluster) {
-        recsPerUser.put(user.getID(), recs);
+      for (Comparable<?> userID : cluster) {
+        recsPerUser.put(userID, recs);
       }
     }
     return Collections.unmodifiableMap(recsPerUser);
   }
 
-  private static List<RecommendedItem> computeTopRecsForCluster(Collection<User> cluster)
+  private List<RecommendedItem> computeTopRecsForCluster(Collection<Comparable<?>> cluster)
       throws TasteException {
-
+    DataModel dataModel = getDataModel();
     Collection<Comparable<?>> allItemIDs = new FastSet<Comparable<?>>();
-    for (User user : cluster) {
-      Preference[] prefs = user.getPreferencesAsArray();
-      for (Preference pref : prefs) {
-        allItemIDs.add(pref.getItemID());
+    for (Comparable<?> userID : cluster) {
+      PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
+      int size = prefs.length();
+      for (int i = 0; i < size; i++) {
+        allItemIDs.add(prefs.getItemID(i));
       }
     }
 
@@ -394,12 +390,13 @@
     return Collections.unmodifiableList(topItems);
   }
 
-  private static Map<Comparable<?>, Collection<User>> computeClustersPerUserID(Collection<Collection<User>> clusters) {
-    Map<Comparable<?>, Collection<User>> clustersPerUser =
-            new FastMap<Comparable<?>, Collection<User>>(clusters.size());
-    for (Collection<User> cluster : clusters) {
-      for (User user : cluster) {
-        clustersPerUser.put(user.getID(), cluster);
+  private static Map<Comparable<?>, Collection<Comparable<?>>>
+      computeClustersPerUserID(Collection<Collection<Comparable<?>>> clusters) {
+    Map<Comparable<?>, Collection<Comparable<?>>> clustersPerUser =
+            new FastMap<Comparable<?>, Collection<Comparable<?>>>(clusters.size());
+    for (Collection<Comparable<?>> cluster : clusters) {
+      for (Comparable<?> userID : cluster) {
+        clustersPerUser.put(userID, cluster);
       }
     }
     return clustersPerUser;
@@ -415,21 +412,22 @@
     return "TreeClusteringRecommender[clusterSimilarity:" + clusterSimilarity + ']';
   }
 
-  private static class Estimator implements TopItems.Estimator<Comparable<?>> {
+  private class Estimator implements TopItems.Estimator<Comparable<?>> {
 
-    private final Collection<User> cluster;
+    private final Collection<Comparable<?>> cluster;
 
-    private Estimator(Collection<User> cluster) {
+    private Estimator(Collection<Comparable<?>> cluster) {
       this.cluster = cluster;
     }
 
     @Override
-    public double estimate(Comparable<?> itemID) {
+    public double estimate(Comparable<?> itemID) throws TasteException {
+      DataModel dataModel = getDataModel();
       RunningAverage average = new FullRunningAverage();
-      for (User user : cluster) {
-        Preference pref = user.getPreferenceFor(itemID);
+      for (Comparable<?> userID : cluster) {
+        Float pref = dataModel.getPreferenceValue(userID, itemID);
         if (pref != null) {
-          average.addDatum(pref.getValue());
+          average.addDatum(pref);
         }
       }
       return average.getAverage();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Tue Aug  4 00:06:46 2009
@@ -26,8 +26,7 @@
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
 import org.apache.mahout.cf.taste.impl.common.RunningAverage;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.ClusteringRecommender;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.cf.taste.recommender.Rescorer;
@@ -46,19 +45,19 @@
 import java.util.concurrent.locks.ReentrantLock;
 
 /**
- * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters {@link
- * org.apache.mahout.cf.taste.model.User}s, then determines the clusters' top recommendations. This implementation
+ * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters users,
+ * then determines the clusters' top recommendations. This implementation
  * builds clusters by repeatedly merging clusters until only a certain number remain, meaning that each cluster is sort
  * of a tree of other clusters.</p>
  *
  * <p>This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:</p> <ul>
- * <li>For all {@link org.apache.mahout.cf.taste.model.User}s in a cluster, recommendations will be the same</li>
+ * <li>For all users in a cluster, recommendations will be the same</li>
  * <li>{@link #estimatePreference(Comparable, Comparable)} may well return {@link Double#NaN}; it does so
- * when asked to estimate preference for an item for which no preference is expressed in the {@link
- * org.apache.mahout.cf.taste.model.User}s in the cluster.</li> </ul>
+ * when asked to estimate preference for an item for which no preference is expressed in the
+ * users in the cluster.</li> </ul>
  *
  * <p>This is an <em>experimental</em> implementation which tries to gain a lot of speed at the cost of accuracy in
- * building clusters, compared to {@link org.apache.mahout.cf.taste.impl.recommender.TreeClusteringRecommender}. It will
+ * building clusters, compared to {@link TreeClusteringRecommender}. It will
  * sometimes cluster two other clusters together that may not be the exact closest two clusters in existence. This may
  * not affect the recommendation quality much, but it potentially speeds up the clustering process dramatically.</p>
  */
@@ -71,16 +70,15 @@
   private final double clusteringThreshold;
   private final boolean clusteringByThreshold;
   private Map<Comparable<?>, List<RecommendedItem>> topRecsByUserID;
-  private Collection<Collection<User>> allClusters;
-  private Map<Comparable<?>, Collection<User>> clustersByUserID;
+  private Collection<Collection<Comparable<?>>> allClusters;
+  private Map<Comparable<?>, Collection<Comparable<?>>> clustersByUserID;
   private boolean clustersBuilt;
   private final ReentrantLock buildClustersLock;
   private final RefreshHelper refreshHelper;
 
   /**
-   * @param dataModel         {@link org.apache.mahout.cf.taste.model.DataModel} which provdes {@link
-   *                          org.apache.mahout.cf.taste.model.User}s
-   * @param clusterSimilarity {@link org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity} used to compute
+   * @param dataModel         {@link DataModel} which provides users
+   * @param clusterSimilarity {@link ClusterSimilarity} used to compute
    *                          cluster similarity
    * @param numClusters       desired number of clusters to create
    * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is less than 2
@@ -112,8 +110,7 @@
   }
 
   /**
-   * @param dataModel           {@link org.apache.mahout.cf.taste.model.DataModel} which provdes {@link
-   *                            org.apache.mahout.cf.taste.model.User}s
+   * @param dataModel           {@link org.apache.mahout.cf.taste.model.DataModel} which provides users
    * @param clusterSimilarity   {@link org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity} used to compute
    *                            cluster similarity
    * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger clusters until
@@ -165,7 +162,7 @@
       return Collections.emptyList();
     }
 
-    User theUser = getDataModel().getUser(userID);
+    DataModel dataModel = getDataModel();
     List<RecommendedItem> rescored = new ArrayList<RecommendedItem>(recommended.size());
     // Only add items the user doesn't already have a preference for.
     // And that the rescorer doesn't "reject".
@@ -174,7 +171,7 @@
       if (rescorer != null && rescorer.isFiltered(itemID)) {
         continue;
       }
-      if (theUser.getPreferenceFor(itemID) == null &&
+      if (dataModel.getPreferenceValue(userID, itemID) == null &&
           (rescorer == null || !Double.isNaN(rescorer.rescore(itemID, recommendedItem.getValue())))) {
         rescored.add(recommendedItem);
       }
@@ -185,15 +182,14 @@
   }
 
   @Override
-  public double estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+  public float estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
     if (userID == null || itemID == null) {
       throw new IllegalArgumentException("userID or itemID is null");
     }
     DataModel model = getDataModel();
-    User theUser = model.getUser(userID);
-    Preference actualPref = theUser.getPreferenceFor(itemID);
+    Float actualPref = getDataModel().getPreferenceValue(userID, itemID);
     if (actualPref != null) {
-      return actualPref.getValue();
+      return actualPref;
     }
     checkClustersBuilt();
     List<RecommendedItem> topRecsForUser = topRecsByUserID.get(userID);
@@ -205,21 +201,21 @@
       }
     }
     // Hmm, we have no idea. The item is not in the user's cluster
-    return Double.NaN;
+    return Float.NaN;
   }
 
   @Override
-  public Collection<User> getCluster(Comparable<?> userID) throws TasteException {
+  public Collection<Comparable<?>> getCluster(Comparable<?> userID) throws TasteException {
     if (userID == null) {
       throw new IllegalArgumentException("userID is null");
     }
     checkClustersBuilt();
-    Collection<User> cluster = clustersByUserID.get(userID);
-    return cluster == null ? Collections.<User>emptyList() : cluster;
+    Collection<Comparable<?>> cluster = clustersByUserID.get(userID);
+    return cluster == null ? Collections.<Comparable<?>>emptyList() : cluster;
   }
 
   @Override
-  public Collection<Collection<User>> getClusters() throws TasteException {
+  public Collection<Collection<Comparable<?>>> getClusters() throws TasteException {
     checkClustersBuilt();
     return allClusters;
   }
@@ -232,23 +228,23 @@
 
   private static final class ClusterClusterPair implements Comparable<ClusterClusterPair> {
 
-    private final Collection<User> cluster1;
-    private final Collection<User> cluster2;
+    private final Collection<Comparable<?>> cluster1;
+    private final Collection<Comparable<?>> cluster2;
     private final double similarity;
 
-    private ClusterClusterPair(Collection<User> cluster1,
-                               Collection<User> cluster2,
+    private ClusterClusterPair(Collection<Comparable<?>> cluster1,
+                               Collection<Comparable<?>> cluster2,
                                double similarity) {
       this.cluster1 = cluster1;
       this.cluster2 = cluster2;
       this.similarity = similarity;
     }
 
-    private Collection<User> getCluster1() {
+    private Collection<Comparable<?>> getCluster1() {
       return cluster1;
     }
 
-    private Collection<User> getCluster2() {
+    private Collection<Comparable<?>> getCluster2() {
       return cluster2;
     }
 
@@ -299,11 +295,11 @@
 
       } else {
 
-        List<Collection<User>> clusters = new LinkedList<Collection<User>>();
+        List<Collection<Comparable<?>>> clusters = new LinkedList<Collection<Comparable<?>>>();
         // Begin with a cluster for each user:
-        for (User user : model.getUsers()) {
-          Collection<User> newCluster = new FastSet<User>();
-          newCluster.add(user);
+        for (Comparable<?> userID : model.getUserIDs()) {
+          Collection<Comparable<?>> newCluster = new FastSet<Comparable<?>>();
+          newCluster.add(userID);
           clusters.add(newCluster);
         }
 
@@ -324,7 +320,7 @@
     }
   }
 
-  private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done)
+  private boolean mergeClosestClusters(int numUsers, List<Collection<Comparable<?>>> clusters, boolean done)
       throws TasteException {
     // We find a certain number of closest clusters...
     LinkedList<ClusterClusterPair> queue = findClosestClusters(numUsers, clusters);
@@ -348,15 +344,15 @@
         break;
       }
 
-      Collection<User> cluster1 = top.getCluster1();
-      Collection<User> cluster2 = top.getCluster2();
+      Collection<Comparable<?>> cluster1 = top.getCluster1();
+      Collection<Comparable<?>> cluster2 = top.getCluster2();
 
       // Pull out current two clusters from clusters
-      Iterator<Collection<User>> clusterIterator = clusters.iterator();
+      Iterator<Collection<Comparable<?>>> clusterIterator = clusters.iterator();
       boolean removed1 = false;
       boolean removed2 = false;
       while (clusterIterator.hasNext() && !(removed1 && removed2)) {
-        Collection<User> current = clusterIterator.next();
+        Collection<Comparable<?>> current = clusterIterator.next();
         // Yes, use == here
         if (!removed1 && cluster1 == current) {
           clusterIterator.remove();
@@ -372,22 +368,22 @@
       for (Iterator<ClusterClusterPair> queueIterator = queue.iterator();
            queueIterator.hasNext();) {
         ClusterClusterPair pair = queueIterator.next();
-        Collection<User> pair1 = pair.getCluster1();
-        Collection<User> pair2 = pair.getCluster2();
+        Collection<Comparable<?>> pair1 = pair.getCluster1();
+        Collection<Comparable<?>> pair2 = pair.getCluster2();
         if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2) {
           queueIterator.remove();
         }
       }
 
       // Make new merged cluster
-      Collection<User> merged = new FastSet<User>(cluster1.size() + cluster2.size());
+      Collection<Comparable<?>> merged = new FastSet<Comparable<?>>(cluster1.size() + cluster2.size());
       merged.addAll(cluster1);
       merged.addAll(cluster2);
 
       // Compare against other clusters; update queue if needed
       // That new pair we're just adding might be pretty close to something else, so
       // catch that case here and put it back into our queue
-      for (Collection<User> cluster : clusters) {
+      for (Collection<Comparable<?>> cluster : clusters) {
         double similarity = clusterSimilarity.getSimilarity(merged, cluster);
         if (similarity > queue.getLast().getSimilarity()) {
           ListIterator<ClusterClusterPair> queueIterator = queue.listIterator();
@@ -408,16 +404,16 @@
     return done;
   }
 
-  private LinkedList<ClusterClusterPair> findClosestClusters(int numUsers, List<Collection<User>> clusters)
+  private LinkedList<ClusterClusterPair> findClosestClusters(int numUsers, List<Collection<Comparable<?>>> clusters)
       throws TasteException {
     boolean full = false;
     LinkedList<ClusterClusterPair> queue = new LinkedList<ClusterClusterPair>();
     int i = 0;
-    for (Collection<User> cluster1 : clusters) {
+    for (Collection<Comparable<?>> cluster1 : clusters) {
       i++;
-      ListIterator<Collection<User>> it2 = clusters.listIterator(i);
+      ListIterator<Collection<Comparable<?>>> it2 = clusters.listIterator(i);
       while (it2.hasNext()) {
-        Collection<User> cluster2 = it2.next();
+        Collection<Comparable<?>> cluster2 = it2.next();
         double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
         if (!Double.isNaN(similarity) &&
             (!full || similarity > queue.getLast().getSimilarity())) {
@@ -442,26 +438,28 @@
     return queue;
   }
 
-  private static Map<Comparable<?>, List<RecommendedItem>> computeTopRecsPerUserID(Iterable<Collection<User>> clusters)
-      throws TasteException {
+  private Map<Comparable<?>, List<RecommendedItem>>
+      computeTopRecsPerUserID(Iterable<Collection<Comparable<?>>> clusters) throws TasteException {
     Map<Comparable<?>, List<RecommendedItem>> recsPerUser = new FastMap<Comparable<?>, List<RecommendedItem>>();
-    for (Collection<User> cluster : clusters) {
+    for (Collection<Comparable<?>> cluster : clusters) {
       List<RecommendedItem> recs = computeTopRecsForCluster(cluster);
-      for (User user : cluster) {
-        recsPerUser.put(user.getID(), recs);
+      for (Comparable<?> userID : cluster) {
+        recsPerUser.put(userID, recs);
       }
     }
     return Collections.unmodifiableMap(recsPerUser);
   }
 
-  private static List<RecommendedItem> computeTopRecsForCluster(Collection<User> cluster)
+  private List<RecommendedItem> computeTopRecsForCluster(Collection<Comparable<?>> cluster)
       throws TasteException {
 
+    DataModel dataModel = getDataModel();
     Collection<Comparable<?>> allItemIDs = new FastSet<Comparable<?>>();
-    for (User user : cluster) {
-      Preference[] prefs = user.getPreferencesAsArray();
-      for (Preference pref : prefs) {
-        allItemIDs.add(pref.getItemID());
+    for (Comparable<?> userID : cluster) {
+      PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
+      int size = prefs.length();
+      for (int i = 0; i < size; i++) {
+        allItemIDs.add(prefs.getItemID(i));
       }
     }
 
@@ -474,11 +472,13 @@
     return Collections.unmodifiableList(topItems);
   }
 
-  private static Map<Comparable<?>, Collection<User>> computeClustersPerUserID(Collection<Collection<User>> clusters) {
-    Map<Comparable<?>, Collection<User>> clustersPerUser = new FastMap<Comparable<?>, Collection<User>>(clusters.size());
-    for (Collection<User> cluster : clusters) {
-      for (User user : cluster) {
-        clustersPerUser.put(user.getID(), cluster);
+  private static Map<Comparable<?>, Collection<Comparable<?>>>
+      computeClustersPerUserID(Collection<Collection<Comparable<?>>> clusters) {
+    Map<Comparable<?>, Collection<Comparable<?>>> clustersPerUser =
+            new FastMap<Comparable<?>, Collection<Comparable<?>>>(clusters.size());
+    for (Collection<Comparable<?>> cluster : clusters) {
+      for (Comparable<?> userID : cluster) {
+        clustersPerUser.put(userID, cluster);
       }
     }
     return clustersPerUser;
@@ -494,21 +494,22 @@
     return "TreeClusteringRecommender2[clusterSimilarity:" + clusterSimilarity + ']';
   }
 
-  private static class Estimator implements TopItems.Estimator<Comparable<?>> {
+  private class Estimator implements TopItems.Estimator<Comparable<?>> {
 
-    private final Collection<User> cluster;
+    private final Collection<Comparable<?>> cluster;
 
-    private Estimator(Collection<User> cluster) {
+    private Estimator(Collection<Comparable<?>> cluster) {
       this.cluster = cluster;
     }
 
     @Override
-    public double estimate(Comparable<?> itemID) {
+    public double estimate(Comparable<?> itemID) throws TasteException {
+      DataModel dataModel = getDataModel();
       RunningAverage average = new FullRunningAverage();
-      for (User user : cluster) {
-        Preference pref = user.getPreferenceFor(itemID);
+      for (Comparable<?> userID : cluster) {
+        Float pref = dataModel.getPreferenceValue(userID, itemID);
         if (pref != null) {
-          average.addDatum(pref.getValue());
+          average.addDatum(pref);
         }
       }
       return average.getAverage();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java Tue Aug  4 00:06:46 2009
@@ -23,8 +23,7 @@
 import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
 import org.apache.mahout.cf.taste.impl.recommender.TopItems;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.cf.taste.recommender.Rescorer;
 import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
@@ -61,27 +60,31 @@
   }
 
 
-  private double[] getInterpolations(Comparable<?> itemID, User theUser, List<Comparable<?>> itemNeighborhood) throws TasteException {
+  private double[] getInterpolations(Comparable<?> itemID, Comparable<?> userID, List<Comparable<?>> itemNeighborhood)
+          throws TasteException {
 
     int k = itemNeighborhood.size();
     double[][] A = new double[k][k];
     double[] b = new double[k];
     int i = 0;
 
+    DataModel dataModel = getDataModel();
+
     int numUsers = getDataModel().getNumUsers();
     for (Comparable<?> iitem : itemNeighborhood) {
-      Preference[] iPrefs = getDataModel().getPreferencesForItemAsArray(iitem);
+      PreferenceArray iPrefs = getDataModel().getPreferencesForItem(iitem);
+      int iSize = iPrefs.length();
       int j = 0;
       for (Comparable<?> jitem : itemNeighborhood) {
         double value = 0.0;
-        for (Preference pi : iPrefs) {
-          User v = pi.getUser();
-          if (v.equals(theUser)) {
+        for (int pi = 0; pi < iSize; pi++) {
+          Comparable<?> v = iPrefs.getUserID(pi);
+          if (v.equals(userID)) {
             continue;
           }
-          Preference pj = v.getPreferenceFor(jitem);
+          Float pj = dataModel.getPreferenceValue(userID, jitem);
           if (pj != null) {
-            value += pi.getValue() * pj.getValue();
+            value += iPrefs.getValue(pi) * pj;
           }
         }
         A[i][j] = value / numUsers;
@@ -90,18 +93,19 @@
       i++;
     }
 
-    Preference[] iPrefs = getDataModel().getPreferencesForItemAsArray(itemID);
+    PreferenceArray iPrefs = getDataModel().getPreferencesForItem(itemID);
+    int iSize = iPrefs.length();
     i = 0;
     for (Comparable<?> jitem : itemNeighborhood) {
       double value = 0.0;
-      for (Preference pi : iPrefs) {
-        User v = pi.getUser();
-        if (v.equals(theUser)) {
+      for (int pi = 0; pi < iSize; pi++) {
+        Comparable<?> v = iPrefs.getUserID(pi);
+        if (v.equals(userID)) {
           continue;
         }
-        Preference pj = v.getPreferenceFor(jitem);
+        Float pj = dataModel.getPreferenceValue(userID, jitem);
         if (pj != null) {
-          value += pi.getValue() * pj.getValue();
+          value += iPrefs.getValue(pi) * pj;
         }
       }
       b[i] = value / numUsers;
@@ -112,11 +116,14 @@
   }
 
   @Override
-  protected double doEstimatePreference(User theUser, Comparable<?> itemID) throws TasteException {
+  protected float doEstimatePreference(Comparable<?> theUserID, Comparable<?> itemID) throws TasteException {
 
+    DataModel dataModel = getDataModel();
     Collection<Comparable<?>> allItemIDs = new FastSet<Comparable<?>>();
-    for (Preference pref : theUser.getPreferencesAsArray()) {
-      allItemIDs.add(pref.getItemID());
+    PreferenceArray prefs = dataModel.getPreferencesFromUser(theUserID);
+    int size = prefs.length();
+    for (int i = 0; i < size; i++) {
+      allItemIDs.add(prefs.getItemID(i));
     }
     allItemIDs.remove(itemID);
 
@@ -127,23 +134,23 @@
     }
 
 
-    double[] weights = getInterpolations(itemID, theUser, theNeighborhood);
+    double[] weights = getInterpolations(itemID, theUserID, theNeighborhood);
 
     int i = 0;
     double preference = 0.0;
     double totalSimilarity = 0.0;
     for (Comparable<?> jitem : theNeighborhood) {
 
-      Preference pref = theUser.getPreferenceFor(jitem);
+      Float pref = dataModel.getPreferenceValue(theUserID, jitem);
 
       if (pref != null) {
-        preference += pref.getValue() * weights[i];
+        preference += pref * weights[i];
         totalSimilarity += weights[i];
       }
       i++;
 
     }
-    return totalSimilarity == 0.0 ? Double.NaN : preference / totalSimilarity;
+    return totalSimilarity == 0.0 ? Float.NaN : (float) (preference / totalSimilarity);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java Tue Aug  4 00:06:46 2009
@@ -30,8 +30,7 @@
 import org.apache.mahout.cf.taste.impl.common.RunningAverage;
 import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -144,14 +143,14 @@
   }
 
   @Override
-  public RunningAverage[] getDiffs(Comparable<?> userID, Comparable<?> itemID, Preference[] prefs)
+  public RunningAverage[] getDiffs(Comparable<?> userID, Comparable<?> itemID, PreferenceArray prefs)
           throws TasteException {
     try {
       buildAverageDiffsLock.readLock().lock();
-      int size = prefs.length;
+      int size = prefs.length();
       RunningAverage[] result = new RunningAverage[size];
       for (int i = 0; i < size; i++) {
-        result[i] = getDiff(prefs[i].getItemID(), itemID);
+        result[i] = getDiff(prefs.getItemID(i), itemID);
       }
       return result;
     } finally {
@@ -165,7 +164,7 @@
   }
 
   @Override
-  public void updateItemPref(Comparable<?> itemID, double prefDelta, boolean remove) {
+  public void updateItemPref(Comparable<?> itemID, float prefDelta, boolean remove) {
     if (!remove && stdDevWeighted) {
       throw new UnsupportedOperationException("Can't update only when stdDevWeighted is set");
     }
@@ -201,11 +200,10 @@
 
   @Override
   public Set<Comparable<?>> getRecommendableItemIDs(Comparable<?> userID) throws TasteException {
-    User user = dataModel.getUser(userID);
     Set<Comparable<?>> result = allRecommendableItemIDs.clone();
     Iterator<Comparable<?>> it = result.iterator();
     while (it.hasNext()) {
-      if (user.getPreferenceFor(it.next()) != null) {
+      if (dataModel.getPreferenceValue(userID, it.next()) != null) {
         it.remove();
       }
     }
@@ -218,8 +216,8 @@
       buildAverageDiffsLock.writeLock().lock();
       averageDiffs.clear();
       long averageCount = 0L;
-      for (User user : dataModel.getUsers()) {
-        averageCount = processOneUser(averageCount, user);
+      for (Comparable<?> userID : dataModel.getUserIDs()) {
+        averageCount = processOneUser(averageCount, userID);
       }
 
       pruneInconsequentialDiffs();
@@ -263,15 +261,14 @@
     allRecommendableItemIDs.rehash();
   }
 
-  private long processOneUser(long averageCount, User user) {
-    log.debug("Processing prefs for user {}", user);
+  private long processOneUser(long averageCount, Comparable<?> userID) throws TasteException {
+    log.debug("Processing prefs for user {}", userID);
     // Save off prefs for the life of this loop iteration
-    Preference[] userPreferences = user.getPreferencesAsArray();
-    int length = userPreferences.length;
+    PreferenceArray userPreferences = dataModel.getPreferencesFromUser(userID);
+    int length = userPreferences.length();
     for (int i = 0; i < length; i++) {
-      Preference prefA = userPreferences[i];
-      double prefAValue = prefA.getValue();
-      Comparable<?> itemIDA = prefA.getItemID();
+      double prefAValue = userPreferences.getValue(i);
+      Comparable<?> itemIDA = userPreferences.getItemID(i);
       FastMap<Comparable<?>, RunningAverage> aMap = averageDiffs.get(itemIDA);
       if (aMap == null) {
         aMap = new FastMap<Comparable<?>, RunningAverage>();
@@ -279,8 +276,7 @@
       }
       for (int j = i + 1; j < length; j++) {
         // This is a performance-critical block
-        Preference prefB = userPreferences[j];
-        Comparable<?> itemIDB = prefB.getItemID();
+        Comparable<?> itemIDB = userPreferences.getItemID(j);
         RunningAverage average = aMap.get(itemIDB);
         if (average == null && averageCount < maxEntries) {
           average = buildRunningAverage();
@@ -288,7 +284,7 @@
           averageCount++;
         }
         if (average != null) {
-          average.addDatum(prefB.getValue() - prefAValue);
+          average.addDatum(userPreferences.getValue(j) - prefAValue);
         }
 
       }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java Tue Aug  4 00:06:46 2009
@@ -27,8 +27,7 @@
 import org.apache.mahout.cf.taste.impl.recommender.AbstractRecommender;
 import org.apache.mahout.cf.taste.impl.recommender.TopItems;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.cf.taste.recommender.Rescorer;
 import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
@@ -107,10 +106,9 @@
 
     log.debug("Recommending items for user ID '{}'", userID);
 
-    User theUser = getDataModel().getUser(userID);
     Set<Comparable<?>> allItemIDs = diffStorage.getRecommendableItemIDs(userID);
 
-    TopItems.Estimator<Comparable<?>> estimator = new Estimator(theUser);
+    TopItems.Estimator<Comparable<?>> estimator = new Estimator(userID);
 
     List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItemIDs, rescorer, estimator);
 
@@ -119,25 +117,24 @@
   }
 
   @Override
-  public double estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+  public float estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
     DataModel model = getDataModel();
-    User theUser = model.getUser(userID);
-    Preference actualPref = theUser.getPreferenceFor(itemID);
+    Float actualPref = model.getPreferenceValue(userID, itemID);
     if (actualPref != null) {
-      return actualPref.getValue();
+      return actualPref;
     }
-    return doEstimatePreference(theUser, itemID);
+    return doEstimatePreference(userID, itemID);
   }
 
-  private double doEstimatePreference(User theUser, Comparable<?> itemID) throws TasteException {
+  private float doEstimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
     double count = 0.0;
     double totalPreference = 0.0;
-    Preference[] prefs = theUser.getPreferencesAsArray();
-    RunningAverage[] averages = diffStorage.getDiffs(theUser.getID(), itemID, prefs);
-    for (int i = 0; i < prefs.length; i++) {
+    PreferenceArray prefs = getDataModel().getPreferencesFromUser(userID);
+    RunningAverage[] averages = diffStorage.getDiffs(userID, itemID, prefs);
+    int size = prefs.length();
+    for (int i = 0; i < size; i++) {
       RunningAverage averageDiff = averages[i];
       if (averageDiff != null) {
-        Preference pref = prefs[i];
         double averageDiffValue = averageDiff.getAverage();
         if (weighted) {
           double weight = (double) averageDiff.getCount();
@@ -152,30 +149,29 @@
             // and disqualify this pref entirely
             // (Thanks Daemmon)
           }
-          totalPreference += weight * (pref.getValue() + averageDiffValue);
+          totalPreference += weight * (prefs.getValue(i) + averageDiffValue);
           count += weight;
         } else {
-          totalPreference += pref.getValue() + averageDiffValue;
+          totalPreference += prefs.getValue(i) + averageDiffValue;
           count += 1.0;
         }
       }
     }
     if (count <= 0.0) {
       RunningAverage itemAverage = diffStorage.getAverageItemPref(itemID);
-      return itemAverage == null ? Double.NaN : itemAverage.getAverage();
+      return itemAverage == null ? Float.NaN : (float) itemAverage.getAverage();
     } else {
-      return totalPreference / count;
+      return (float) (totalPreference / count);
     }
   }
 
   @Override
-  public void setPreference(Comparable<?> userID, Comparable<?> itemID, double value) throws TasteException {
+  public void setPreference(Comparable<?> userID, Comparable<?> itemID, float value) throws TasteException {
     DataModel dataModel = getDataModel();
-    double prefDelta;
+    float prefDelta;
     try {
-      User theUser = dataModel.getUser(userID);
-      Preference oldPref = theUser.getPreferenceFor(itemID);
-      prefDelta = oldPref == null ? value : value - oldPref.getValue();
+      Float oldPref = dataModel.getPreferenceValue(userID, itemID);
+      prefDelta = oldPref == null ? value : value - oldPref;
     } catch (NoSuchUserException nsee) {
       prefDelta = value;
     }
@@ -186,11 +182,10 @@
   @Override
   public void removePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
     DataModel dataModel = getDataModel();
-    User theUser = dataModel.getUser(userID);
-    Preference oldPref = theUser.getPreferenceFor(itemID);
+    Float oldPref = dataModel.getPreferenceValue(userID, itemID);
     super.removePreference(userID, itemID);
     if (oldPref != null) {
-      diffStorage.updateItemPref(itemID, oldPref.getValue(), true);
+      diffStorage.updateItemPref(itemID, oldPref, true);
     }
   }
 
@@ -208,15 +203,15 @@
 
   private final class Estimator implements TopItems.Estimator<Comparable<?>> {
 
-    private final User theUser;
+    private final Comparable<?> userID;
 
-    private Estimator(User theUser) {
-      this.theUser = theUser;
+    private Estimator(Comparable<?> userID) {
+      this.userID = userID;
     }
 
     @Override
     public double estimate(Comparable<?> itemID) throws TasteException {
-      return doEstimatePreference(theUser, itemID);
+      return doEstimatePreference(userID, itemID);
     }
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java Tue Aug  4 00:06:46 2009
@@ -25,7 +25,7 @@
 import org.apache.mahout.cf.taste.impl.common.RunningAverage;
 import org.apache.mahout.cf.taste.impl.common.jdbc.AbstractJDBCComponent;
 import org.apache.mahout.cf.taste.model.JDBCDataModel;
-import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -148,9 +148,9 @@
   }
 
   @Override
-  public RunningAverage[] getDiffs(Comparable<?> userID, Comparable<?> itemID, Preference[] prefs)
+  public RunningAverage[] getDiffs(Comparable<?> userID, Comparable<?> itemID, PreferenceArray prefs)
       throws TasteException {
-    int size = prefs.length;
+    int size = prefs.length();
     RunningAverage[] result = new RunningAverage[size];
     Connection conn = null;
     PreparedStatement stmt = null;
@@ -170,7 +170,7 @@
       int i = 0;
       while (rs.next()) {
         Comparable<?> nextResultItemID = (Comparable<?>) rs.getObject(3);
-        while (!prefs[i].getItemID().equals(nextResultItemID)) {
+        while (!prefs.getItemID(i).equals(nextResultItemID)) {
           i++;
           // result[i] is null for these values of i
         }
@@ -215,7 +215,7 @@
   }
 
   @Override
-  public void updateItemPref(Comparable<?> itemID, double prefDelta, boolean remove)
+  public void updateItemPref(Comparable<?> itemID, float prefDelta, boolean remove)
       throws TasteException {
     Connection conn = null;
     try {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Tue Aug  4 00:06:46 2009
@@ -31,7 +31,7 @@
  * <tr><td>234</td><td>789</td><td>2.1</td><td>1</td></tr> </table>
  *
  * <p><code>item_id_a</code> and <code>item_id_b</code> may have types compatible with Java String, or integer
- * primitive types. <code>average_diff</code> must be compatible with <code>double</code> and
+ * primitive types. <code>average_diff</code> must be compatible with <code>float</code> and
  * <code>count</code> must be compatible with <code>int</code>.</p>
  *
  * <p>The following command sets up a suitable table in MySQL:</p>

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java Tue Aug  4 00:06:46 2009
@@ -30,7 +30,6 @@
 import org.apache.mahout.cf.taste.impl.recommender.TopItems;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.cf.taste.recommender.Recommender;
 import org.apache.mahout.cf.taste.recommender.Rescorer;
@@ -84,8 +83,8 @@
     userMap = new FastMap<Comparable<?>, Integer>(numUsers);
 
     int idx = 0;
-    for (User user : dataModel.getUsers()) {
-      userMap.put(user.getID(), idx++);
+    for (Comparable<?> userID : dataModel.getUserIDs()) {
+      userMap.put(userID, idx++);
     }
 
     int numItems = dataModel.getNumItems();
@@ -117,8 +116,9 @@
 
   private void recachePreferences() throws TasteException {
     cachedPreferences.clear();
-    for (User user : getDataModel().getUsers()) {
-      for (Preference pref : user.getPreferences()) {
+    DataModel dataModel = getDataModel();
+    for (Comparable<?> userID : dataModel.getUserIDs()) {
+      for  (Preference pref : dataModel.getPreferencesFromUser(userID)) {
         cachedPreferences.add(pref);
       }
     }
@@ -126,8 +126,9 @@
 
   private double getAveragePreference() throws TasteException {
     RunningAverage average = new FullRunningAverage();
-    for (User user : getDataModel().getUsers()) {
-      for (Preference pref : user.getPreferences()) {
+    DataModel dataModel = getDataModel();
+    for (Comparable<?> userID : dataModel.getUserIDs()) {
+      for (Preference pref : dataModel.getPreferencesFromUser(userID)) {
         average.addDatum(pref.getValue());
       }
     }
@@ -144,20 +145,20 @@
     Collections.shuffle(cachedPreferences, random);
     for (int i = 0; i < numFeatures; i++) {
       for (Preference pref : cachedPreferences) {
-        int useridx = userMap.get(pref.getUser().getID());
+        int useridx = userMap.get(pref.getUserID());
         int itemidx = itemMap.get(pref.getItemID());
         emSvd.train(useridx, itemidx, i, pref.getValue());
       }
     }
   }
 
-  private double predictRating(int user, int item) {
-    return emSvd.getDotProduct(user, item);
+  private float predictRating(int user, int item) {
+    return (float) emSvd.getDotProduct(user, item);
   }
 
 
   @Override
-  public double estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+  public float estimatePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
     Integer useridx = userMap.get(userID);
     if (useridx == null) {
       throw new NoSuchUserException();
@@ -182,11 +183,9 @@
 
     log.debug("Recommending items for user ID '{}'", userID);
 
-    User theUser = getDataModel().getUser(userID);
+    Set<Comparable<?>> allItemIDs = getAllOtherItems(userID);
 
-    Set<Comparable<?>> allItemIDs = getAllOtherItems(theUser);
-
-    TopItems.Estimator<Comparable<?>> estimator = new Estimator(theUser);
+    TopItems.Estimator<Comparable<?>> estimator = new Estimator(userID);
 
     List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItemIDs, rescorer, estimator);
 
@@ -206,15 +205,15 @@
 
   private final class Estimator implements TopItems.Estimator<Comparable<?>> {
 
-    private final User theUser;
+    private final Comparable<?> theUserID;
 
-    private Estimator(User theUser) {
-      this.theUser = theUser;
+    private Estimator(Comparable<?> theUserID) {
+      this.theUserID = theUserID;
     }
 
     @Override
     public double estimate(Comparable<?> itemID) throws TasteException {
-      return estimatePreference(theUser.getID(), itemID);
+      return estimatePreference(theUserID, itemID);
     }
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java Tue Aug  4 00:06:46 2009
@@ -23,7 +23,7 @@
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
 import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
@@ -127,30 +127,32 @@
    * @param sumY2      sum of the square of the user/item preference values, over the second item/user
    * @param sumXYdiff2 sum of squares of differences in X and Y values
    * @return similarity value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no similarity can be computed
-   *         (e.g. when no items have been rated by both {@link User}s
+   *         (e.g. when no items have been rated by both uesrs
    */
   abstract double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2);
 
   @Override
-  public double userSimilarity(User user1, User user2) throws TasteException {
+  public double userSimilarity(Comparable<?> userID1, Comparable<?> userID2) throws TasteException {
 
-    if (user1 == null || user2 == null) {
-      throw new IllegalArgumentException("user1 or user2 is null");
+    if (userID1 == null || userID2 == null) {
+      throw new IllegalArgumentException("userID1 or userID2 is null");
     }
 
-    Preference[] xPrefs = user1.getPreferencesAsArray();
-    Preference[] yPrefs = user2.getPreferencesAsArray();
+    PreferenceArray xPrefs = dataModel.getPreferencesFromUser(userID1);
+    PreferenceArray yPrefs = dataModel.getPreferencesFromUser(userID2);
+    int xLength = xPrefs.length();
+    int yLength = yPrefs.length();
 
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
+    if (xLength == 0 || yLength == 0) {
       return Double.NaN;
     }
 
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
+    Preference xPref = xPrefs.get(0);
+    Preference yPref = yPrefs.get(0);
     Comparable<?> xIndex = xPref.getItemID();
     Comparable<?> yIndex = yPref.getItemID();
-    int xPrefIndex = 1;
-    int yPrefIndex = 1;
+    int xPrefIndex = 0;
+    int yPrefIndex = 0;
 
     double sumX = 0.0;
     double sumX2 = 0.0;
@@ -183,11 +185,11 @@
           if (compare < 0) {
             // X has a value; infer Y's
             x = hasPrefTransform ? prefTransform.getTransformedValue(xPref) : xPref.getValue();
-            y = inferrer.inferPreference(user2, xIndex);
+            y = inferrer.inferPreference(userID2, xIndex);
           } else {
             // compare > 0
             // Y has a value; infer X's
-            x = inferrer.inferPreference(user1, yIndex);
+            x = inferrer.inferPreference(userID1, yIndex);
             y = hasPrefTransform ? prefTransform.getTransformedValue(yPref) : yPref.getValue();
           }
         }
@@ -201,17 +203,17 @@
         count++;
       }
       if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
+        if (++xPrefIndex >= xLength) {
           break;
         }
-        xPref = xPrefs[xPrefIndex++];
+        xPref = xPrefs.get(xPrefIndex);
         xIndex = xPref.getItemID();
       }
       if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
+        if (++yPrefIndex >= yLength) {
           break;
         }
-        yPref = yPrefs[yPrefIndex++];
+        yPref = yPrefs.get(yPrefIndex);
         yIndex = yPref.getItemID();
       }
     }
@@ -230,7 +232,7 @@
     double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
 
     if (similarityTransform != null) {
-      result = similarityTransform.transformSimilarity(user1, user2, result);
+      result = similarityTransform.transformSimilarity(userID1, userID2, result);
     }
 
     if (!Double.isNaN(result)) {
@@ -246,17 +248,19 @@
       throw new IllegalArgumentException("item1 or item2 is null");
     }
 
-    Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(itemID1);
-    Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(itemID2);
+    PreferenceArray xPrefs = dataModel.getPreferencesForItem(itemID1);
+    PreferenceArray yPrefs = dataModel.getPreferencesForItem(itemID2);
+    int xLength = xPrefs.length();
+    int yLength = yPrefs.length();
 
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
+    if (xLength == 0 || yLength == 0) {
       return Double.NaN;
     }
 
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
-    User xIndex = xPref.getUser();
-    User yIndex = yPref.getUser();
+    Preference xPref = xPrefs.get(0);
+    Preference yPref = yPrefs.get(0);
+    Comparable<?> xIndex = xPref.getUserID();
+    Comparable<?> yIndex = yPref.getUserID();
     int xPrefIndex = 1;
     int yPrefIndex = 1;
 
@@ -271,7 +275,7 @@
     // No, pref inferrers and transforms don't appy here. I think.
 
     while (true) {
-      int compare = xIndex.compareTo(yIndex);
+      int compare = ((Comparable<Object>) xIndex).compareTo(yIndex);
       if (compare == 0) {
         // Both users expressed a preference for the item
         double x = xPref.getValue();
@@ -286,18 +290,18 @@
         count++;
       }
       if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
+        if (xPrefIndex == xLength) {
           break;
         }
-        xPref = xPrefs[xPrefIndex++];
-        xIndex = xPref.getUser();
+        xPref = xPrefs.get(xPrefIndex++);
+        xIndex = xPref.getUserID();
       }
       if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
+        if (yPrefIndex == yLength) {
           break;
         }
-        yPref = yPrefs[yPrefIndex++];
-        yIndex = yPref.getUser();
+        yPref = yPrefs.get(yPrefIndex++);
+        yIndex = yPref.getUserID();
       }
     }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java Tue Aug  4 00:06:46 2009
@@ -24,34 +24,36 @@
 import org.apache.mahout.cf.taste.impl.common.Retriever;
 import org.apache.mahout.cf.taste.impl.common.RunningAverage;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
 
 import java.util.Collection;
 
 /**
- * <p>Implementations of this interface compute an inferred preference for a {@link User} and an item that the
+ * <p>Implementations of this interface compute an inferred preference for a user and an item that the
  * user has not expressed any preference for. This might be an average of other preferences scores from that user, for
  * example. This technique is sometimes called "default voting".</p>
  */
 public final class AveragingPreferenceInferrer implements PreferenceInferrer {
 
-  private static final Retriever<User, Double> RETRIEVER = new PrefRetriever();
+  private static final Float ZERO = 0.0f;
 
-  private final Cache<User, Double> averagePreferenceValue;
+  private final DataModel dataModel;
+  private final Cache<Comparable<?>, Float> averagePreferenceValue;
 
   public AveragingPreferenceInferrer(DataModel dataModel) throws TasteException {
-    averagePreferenceValue = new Cache<User, Double>(RETRIEVER, dataModel.getNumUsers());
+    this.dataModel = dataModel;
+    Retriever<Comparable<?>, Float> retriever = new PrefRetriever();
+    averagePreferenceValue = new Cache<Comparable<?>, Float>(retriever, dataModel.getNumUsers());
     refresh(null);
   }
 
   @Override
-  public double inferPreference(User user, Comparable<?> itemID) throws TasteException {
-    if (user == null || itemID == null) {
-      throw new IllegalArgumentException("user or item is null");
+  public float inferPreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+    if (userID == null || itemID == null) {
+      throw new IllegalArgumentException("userID or item is null");
     }
-    return averagePreferenceValue.get(user);
+    return averagePreferenceValue.get(userID);
   }
 
   @Override
@@ -59,20 +61,20 @@
     averagePreferenceValue.clear();
   }
 
-  private static final class PrefRetriever implements Retriever<User, Double> {
-    private static final Double ZERO = 0.0;
+  private final class PrefRetriever implements Retriever<Comparable<?>, Float> {
 
     @Override
-    public Double get(User key) {
+    public Float get(Comparable<?> key) throws TasteException {
       RunningAverage average = new FullRunningAverage();
-      Preference[] prefs = key.getPreferencesAsArray();
-      if (prefs.length == 0) {
+      PreferenceArray prefs = dataModel.getPreferencesFromUser(key);
+      int size = prefs.length();
+      if (size == 0) {
         return ZERO;
       }
-      for (Preference pref : prefs) {
-        average.addDatum(pref.getValue());
+      for (int i = 0; i < size; i++) {
+        average.addDatum(prefs.getValue(i));
       }
-      return average.getAverage();
+      return (float) average.getAverage();
     }
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java Tue Aug  4 00:06:46 2009
@@ -24,7 +24,6 @@
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
 import org.apache.mahout.cf.taste.impl.common.Retriever;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.User;
 import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 
@@ -34,7 +33,7 @@
 public final class CachingUserSimilarity implements UserSimilarity {
 
   private final UserSimilarity similarity;
-  private final Cache<Pair<User, User>, Double> similarityCache;
+  private final Cache<Pair<Comparable<?>, Comparable<?>>, Double> similarityCache;
 
   public CachingUserSimilarity(UserSimilarity similarity, DataModel dataModel) throws TasteException {
     if (similarity == null) {
@@ -42,14 +41,14 @@
     }
     this.similarity = similarity;
     int maxCacheSize = dataModel.getNumUsers(); // just a dumb heuristic for sizing    
-    this.similarityCache = new Cache<Pair<User, User>, Double>(new SimilarityRetriever(similarity), maxCacheSize);
+    this.similarityCache = new Cache<Pair<Comparable<?>, Comparable<?>>, Double>(new SimilarityRetriever(similarity), maxCacheSize);
   }
 
   @Override
-  public double userSimilarity(User user1, User user2) throws TasteException {
-    Pair<User, User> key = user1.compareTo(user2) < 0 ?
-        new Pair<User, User>(user1, user2) :
-        new Pair<User, User>(user2, user1);
+  public double userSimilarity(Comparable<?> userID1, Comparable<?> userID2) throws TasteException {
+    Pair<Comparable<?>, Comparable<?>> key = ((Comparable<Object>) userID1).compareTo(userID2) < 0 ?
+        new Pair<Comparable<?>, Comparable<?>>(userID1, userID2) :
+        new Pair<Comparable<?>, Comparable<?>>(userID2, userID1);
     return similarityCache.get(key);
   }
 
@@ -66,7 +65,7 @@
     RefreshHelper.maybeRefresh(alreadyRefreshed, similarity);
   }
 
-  private static final class SimilarityRetriever implements Retriever<Pair<User, User>, Double> {
+  private static final class SimilarityRetriever implements Retriever<Pair<Comparable<?>, Comparable<?>>, Double> {
     private final UserSimilarity similarity;
 
     private SimilarityRetriever(UserSimilarity similarity) {
@@ -74,7 +73,7 @@
     }
 
     @Override
-    public Double get(Pair<User, User> key) throws TasteException {
+    public Double get(Pair<Comparable<?>, Comparable<?>> key) throws TasteException {
       return similarity.userSimilarity(key.getFirst(), key.getSecond());
     }
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java Tue Aug  4 00:06:46 2009
@@ -22,8 +22,8 @@
 import org.apache.mahout.cf.taste.model.DataModel;
 
 /**
- * <p>An implementation of a "similarity" based on the Euclidean "distance" between two {@link
- * org.apache.mahout.cf.taste.model.User}s X and Y. Thinking of items as dimensions and preferences as points along
+ * <p>An implementation of a "similarity" based on the Euclidean "distance" between two 
+ * users X and Y. Thinking of items as dimensions and preferences as points along
  * those dimensions, a distance is computed using all items (dimensions) where both users have expressed a preference
  * for that item. This is simply the square root of the sum of the squares of differences in position (preference) along
  * each dimension. The similarity is then computed as 1 / (1 + distance), so the resulting values are in the range

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java Tue Aug  4 00:06:46 2009
@@ -25,7 +25,6 @@
 import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.impl.recommender.TopItems;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.User;
 import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 
@@ -37,7 +36,8 @@
 
 public final class GenericUserSimilarity implements UserSimilarity {
 
-  private final Map<User, Map<User, Double>> similarityMaps = new FastMap<User, Map<User, Double>>();
+  private final Map<Comparable<?>, Map<Comparable<?>, Double>> similarityMaps =
+          new FastMap<Comparable<?>, Map<Comparable<?>, Double>>();
 
   public GenericUserSimilarity(Iterable<UserUserSimilarity> similarities) {
     initSimilarityMaps(similarities);
@@ -49,15 +49,15 @@
   }
 
   public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel) throws TasteException {
-    List<? extends User> users = IteratorUtils.iterableToList(dataModel.getUsers());
-    Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, users);
+    List<? extends Comparable<?>> userIDs = IteratorUtils.iterableToList(dataModel.getUserIDs());
+    Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, userIDs);
     initSimilarityMaps(new IteratorIterable<UserUserSimilarity>(it));
   }
 
   public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
       throws TasteException {
-    List<? extends User> users = IteratorUtils.iterableToList(dataModel.getUsers());
-    Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, users);
+    List<? extends Comparable<?>> userIDs = IteratorUtils.iterableToList(dataModel.getUserIDs());
+    Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, userIDs);
     Iterable<UserUserSimilarity> keptSimilarities =
         TopItems.getTopUserUserSimilarities(maxToKeep, new IteratorIterable<UserUserSimilarity>(it));
     initSimilarityMaps(keptSimilarities);
@@ -65,13 +65,13 @@
 
   private void initSimilarityMaps(Iterable<UserUserSimilarity> similarities) {
     for (UserUserSimilarity uuc : similarities) {
-      User similarityUser1 = uuc.getUser1();
-      User similarityUser2 = uuc.getUser2();
-      int compare = similarityUser1.compareTo(similarityUser2);
+      Comparable<?> similarityUser1 = uuc.getUserID1();
+      Comparable<?> similarityUser2 = uuc.getUserID2();
+      int compare = ((Comparable<Object>) similarityUser1).compareTo(similarityUser2);
       if (compare != 0) {
         // Order them -- first key should be the "smaller" one
-        User user1;
-        User user2;
+        Comparable<?> user1;
+        Comparable<?> user2;
         if (compare < 0) {
           user1 = similarityUser1;
           user2 = similarityUser2;
@@ -79,9 +79,9 @@
           user1 = similarityUser2;
           user2 = similarityUser1;
         }
-        Map<User, Double> map = similarityMaps.get(user1);
+        Map<Comparable<?>, Double> map = similarityMaps.get(user1);
         if (map == null) {
-          map = new FastMap<User, Double>();
+          map = new FastMap<Comparable<?>, Double>();
           similarityMaps.put(user1, map);
         }
         map.put(user2, uuc.getValue());
@@ -91,21 +91,21 @@
   }
 
   @Override
-  public double userSimilarity(User user1, User user2) {
-    int compare = user1.compareTo(user2);
+  public double userSimilarity(Comparable<?> userID1, Comparable<?> userID2) {
+    int compare = ((Comparable<Object>) userID1).compareTo(userID2);
     if (compare == 0) {
       return 1.0;
     }
-    User first;
-    User second;
+    Comparable<?> first;
+    Comparable<?> second;
     if (compare < 0) {
-      first = user1;
-      second = user2;
+      first = userID1;
+      second = userID2;
     } else {
-      first = user2;
-      second = user1;
+      first = userID2;
+      second = userID1;
     }
-    Map<User, Double> nextMap = similarityMaps.get(first);
+    Map<Comparable<?>, Double> nextMap = similarityMaps.get(first);
     if (nextMap == null) {
       return Double.NaN;
     }
@@ -125,28 +125,28 @@
 
   public static final class UserUserSimilarity implements Comparable<UserUserSimilarity> {
 
-    private final User user1;
-    private final User user2;
+    private final Comparable<?> userID1;
+    private final Comparable<?> userID2;
     private final double value;
 
-    public UserUserSimilarity(User user1, User user2, double value) {
-      if (user1 == null || user2 == null) {
+    public UserUserSimilarity(Comparable<?> userID1, Comparable<?> userID2, double value) {
+      if (userID1 == null || userID2 == null) {
         throw new IllegalArgumentException("A user is null");
       }
       if (Double.isNaN(value) || value < -1.0 || value > 1.0) {
         throw new IllegalArgumentException("Illegal value: " + value);
       }
-      this.user1 = user1;
-      this.user2 = user2;
+      this.userID1 = userID1;
+      this.userID2 = userID2;
       this.value = value;
     }
 
-    public User getUser1() {
-      return user1;
+    public Comparable<?> getUserID1() {
+      return userID1;
     }
 
-    public User getUser2() {
-      return user2;
+    public Comparable<?> getUserID2() {
+      return userID2;
     }
 
     public double getValue() {
@@ -155,7 +155,7 @@
 
     @Override
     public String toString() {
-      return "UserUserSimilarity[" + user1 + ',' + user2 + ':' + value + ']';
+      return "UserUserSimilarity[" + userID1 + ',' + userID2 + ':' + value + ']';
     }
 
     /** Defines an ordering from highest similarity to lowest. */
@@ -171,12 +171,12 @@
         return false;
       }
       UserUserSimilarity otherSimilarity = (UserUserSimilarity) other;
-      return otherSimilarity.user1.equals(user1) && otherSimilarity.user2.equals(user2) && otherSimilarity.value == value;
+      return otherSimilarity.userID1.equals(userID1) && otherSimilarity.userID2.equals(userID2) && otherSimilarity.value == value;
     }
 
     @Override
     public int hashCode() {
-      return user1.hashCode() ^ user2.hashCode() ^ RandomUtils.hashDouble(value);
+      return userID1.hashCode() ^ userID2.hashCode() ^ RandomUtils.hashDouble(value);
     }
 
   }
@@ -184,18 +184,18 @@
   private static final class DataModelSimilaritiesIterator implements Iterator<UserUserSimilarity> {
 
     private final UserSimilarity otherSimilarity;
-    private final List<? extends User> users;
+    private final List<? extends Comparable<?>> userIDs;
     private final int size;
     private int i;
-    private User user1;
+    private Comparable<?> userID1;
     private int j;
 
-    private DataModelSimilaritiesIterator(UserSimilarity otherSimilarity, List<? extends User> users) {
+    private DataModelSimilaritiesIterator(UserSimilarity otherSimilarity, List<? extends Comparable<?>> userIDs) {
       this.otherSimilarity = otherSimilarity;
-      this.users = users;
-      this.size = users.size();
+      this.userIDs = userIDs;
+      this.size = userIDs.size();
       i = 0;
-      user1 = users.get(0);
+      userID1 = userIDs.get(0);
       j = 1;
     }
 
@@ -209,19 +209,19 @@
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      User user2 = users.get(j);
+      Comparable<?> userID2 = userIDs.get(j);
       double similarity;
       try {
-        similarity = otherSimilarity.userSimilarity(user1, user2);
+        similarity = otherSimilarity.userSimilarity(userID1, userID2);
       } catch (TasteException te) {
         // ugly:
         throw new RuntimeException(te);
       }
-      UserUserSimilarity result = new UserUserSimilarity(user1, user2, similarity);
+      UserUserSimilarity result = new UserUserSimilarity(userID1, userID2, similarity);
       j++;
       if (j == size) {
         i++;
-        user1 = users.get(i);
+        userID1 = userIDs.get(i);
         j = i + 1;
       }
       return result;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java Tue Aug  4 00:06:46 2009
@@ -19,10 +19,9 @@
 
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
 import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
 import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
@@ -47,57 +46,24 @@
   }
 
   @Override
-  public double userSimilarity(User user1, User user2) throws TasteException {
-    if (user1 == null || user2 == null) {
-      throw new IllegalArgumentException("user1 or user2 is null");
-    }
-
-    Preference[] xPrefs = user1.getPreferencesAsArray();
-    Preference[] yPrefs = user2.getPreferencesAsArray();
-
-    if (xPrefs.length == 0 && yPrefs.length == 0) {
-      return Double.NaN;
-    }
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
-      return 0.0;
-    }
+  public double userSimilarity(Comparable<?> userID1, Comparable<?> userID2) throws TasteException {
 
-    int intersectionSize = findIntersectionSize(xPrefs, yPrefs);
+    FastSet<Comparable<?>> prefs1 = dataModel.getItemIDsFromUser(userID1);
+    FastSet<Comparable<?>> prefs2 = dataModel.getItemIDsFromUser(userID2);
 
+    int prefs1Size = prefs1.size();
+    int prefs2Size = prefs2.size();
+    int intersectionSize = prefs1Size < prefs2Size ?
+        prefs2.intersectionSize(prefs1) :
+        prefs1.intersectionSize(prefs2);
     int numItems = dataModel.getNumItems();
-    double logLikelihood =
-        twoLogLambda(intersectionSize, xPrefs.length - intersectionSize, yPrefs.length, numItems - yPrefs.length);
+    double logLikelihood = LogLikelihoodSimilarity.twoLogLambda(intersectionSize,
+                                                                prefs1Size - intersectionSize,
+                                                                prefs2Size,
+                                                                numItems - prefs2Size);
     return 1.0 - 1.0 / (1.0 + logLikelihood);
   }
 
-  static int findIntersectionSize(Preference[] xPrefs, Preference[] yPrefs) {
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
-    int xPrefIndex = 1;
-    int yPrefIndex = 1;
-
-    int intersectionSize = 0;
-    while (true) {
-      int compare = ((Comparable<Object>) xPref.getItemID()).compareTo(yPref.getItemID());
-      if (compare == 0) {
-        intersectionSize++;
-      }
-      if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
-          break;
-        }
-        xPref = xPrefs[xPrefIndex++];
-      }
-      if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
-          break;
-        }
-        yPref = yPrefs[yPrefIndex++];
-      }
-    }
-    return intersectionSize;
-  }
-
   @Override
   public double itemSimilarity(Comparable<?> itemID1, Comparable<?> itemID2) throws TasteException {
     if (itemID1 == null || itemID2 == null) {
@@ -122,7 +88,7 @@
   }
 
   private static double safeLog(double d) {
-    return d <= 0.0 ? 0 : Math.log(d);
+    return d <= 0.0 ? 0.0 : Math.log(d);
   }
 
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java Tue Aug  4 00:06:46 2009
@@ -20,10 +20,9 @@
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.common.Weighting;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.User;
 
 /**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values are calculated:</p>
+ * <p>An implementation of the Pearson correlation. For users X and Y, the following values are calculated:</p>
  *
  * <ul> <li>sumX2: sum of the square of all X's preference values</li> <li>sumY2: sum of the square of all Y's
  * preference values</li> <li>sumXY: sum of the product of X and Y's preference value for all items for which both X and