You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/07/20 18:35:52 UTC

svn commit: r795905 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl: eval/GenericRecommenderIRStatsEvaluator.java model/GenericBooleanUserDataModel.java

Author: srowen
Date: Mon Jul 20 16:35:52 2009
New Revision: 795905

URL: http://svn.apache.org/viewvc?rev=795905&view=rev
Log:
Add early version of GenericBooleanUserDataModel

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanUserDataModel.java
      - copied, changed from r795833, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=795905&r1=795904&r2=795905&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java Mon Jul 20 16:35:52 2009
@@ -32,6 +32,7 @@
 import org.apache.mahout.cf.taste.impl.model.ByValuePreferenceComparator;
 import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
 import org.apache.mahout.cf.taste.impl.model.GenericUser;
+import org.apache.mahout.cf.taste.impl.model.GenericBooleanUserDataModel;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.model.Preference;
@@ -126,7 +127,13 @@
           for (User user2 : dataModel.getUsers()) {
             processOtherUser(id, relevantItems, trainingUsers, user2);
           }
-          DataModel trainingModel = new GenericDataModel(trainingUsers);
+
+          DataModel trainingModel;
+          if (trainingUsers.get(0) instanceof BooleanPrefUser) {
+            trainingModel = new GenericBooleanUserDataModel(trainingUsers);
+          } else {
+            trainingModel = new GenericDataModel(trainingUsers);
+          }
           Recommender recommender = recommenderBuilder.buildRecommender(trainingModel);
 
           try {

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanUserDataModel.java (from r795833, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanUserDataModel.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanUserDataModel.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java&r1=795833&r2=795905&rev=795905&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanUserDataModel.java Mon Jul 20 16:35:52 2009
@@ -21,16 +21,11 @@
 import org.apache.mahout.cf.taste.common.NoSuchUserException;
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.ArrayIterator;
-import org.apache.mahout.cf.taste.impl.common.EmptyIterable;
 import org.apache.mahout.cf.taste.impl.common.FastMap;
-import org.apache.mahout.cf.taste.impl.common.FastSet;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.User;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.Serializable;
 import java.util.ArrayList;
@@ -39,91 +34,42 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 /**
- * <p>A simple {@link DataModel} which uses a given {@link List} of {@link User}s as its data source. This
- * implementation is mostly useful for small experiments and is not recommended for contexts where performance is
- * important.</p>
+ * TODO many methods here have not been implemented properly yet
  */
-public final class GenericDataModel implements DataModel, Serializable {
-
-  private static final Logger log = LoggerFactory.getLogger(GenericDataModel.class);
-
-  private static final Preference[] NO_PREFS_ARRAY = new Preference[0];
-  private static final Iterable<Preference> NO_PREFS_ITERABLE = new EmptyIterable<Preference>();
+public final class GenericBooleanUserDataModel implements DataModel, Serializable {
 
   private final List<User> users;
   private final Map<Object, User> userMap;
-  private final List<Item> items;
-  private final Map<Object, Item> itemMap;
-  private final Map<Object, Preference[]> preferenceForItems;
-
-  /**
-   * <p>Creates a new {@link GenericDataModel} from the given {@link User}s (and their preferences). This {@link
-   * DataModel} retains all this information in memory and is effectively immutable.</p>
-   *
-   * @param users {@link User}s to include in this {@link GenericDataModel}
-   */
-  @SuppressWarnings("unchecked")
-  public GenericDataModel(Iterable<? extends User> users) {
+  private final Object[] itemIDs;
+  //private final Set<Object> itemIDSet;
+
+  public GenericBooleanUserDataModel(Iterable<? extends User> users) {
     if (users == null) {
       throw new IllegalArgumentException("users is null");
     }
-
+    this.users = new ArrayList<User>();
     this.userMap = new FastMap<Object, User>();
-    this.itemMap = new FastMap<Object, Item>();
-    // I'm abusing generics a little here since I want to use this (huge) map to hold Lists,
-    // then arrays, and don't want to allocate two Maps at once here.
-    Map<Object, Object> prefsForItems = new FastMap<Object, Object>();
-    int currentCount = 0;
+    List<Object> itemIDs = new ArrayList<Object>();
     for (User user : users) {
-      userMap.put(user.getID(), user);
-      Preference[] prefsArray = user.getPreferencesAsArray();
-      for (Preference preference : prefsArray) {
-        Item item = preference.getItem();
-        Object itemID = item.getID();
-        itemMap.put(itemID, item);
-        List<Preference> prefsForItem = (List<Preference>) prefsForItems.get(itemID);
-        if (prefsForItem == null) {
-          prefsForItem = new ArrayList<Preference>();
-          prefsForItems.put(itemID, prefsForItem);
-        }
-        prefsForItem.add(preference);
-      }
-      currentCount++;
-      if (currentCount % 10000 == 0) {
-        log.info("Processed {} users", currentCount);
+      if (!(user instanceof BooleanPrefUser)) {
+        throw new IllegalArgumentException("Must use a source of BooleanPrefUsers");
       }
+      this.users.add(user);
+      userMap.put(user.getID(), user);
+      itemIDs.addAll(((BooleanPrefUser) user).getItemIDs());
     }
+    Collections.sort(this.users);
 
-    List<User> usersCopy = new ArrayList<User>(userMap.values());
-    Collections.sort(usersCopy);
-    this.users = Collections.unmodifiableList(usersCopy);
-
-    List<Item> itemsCopy = new ArrayList<Item>(itemMap.values());
-    Collections.sort(itemsCopy);
-    this.items = Collections.unmodifiableList(itemsCopy);
-
-    // Swap out lists for arrays here -- using the same Map. This is why the generics mess is worth it.
-    for (Map.Entry<Object, Object> entry : prefsForItems.entrySet()) {
-      List<Preference> list = (List<Preference>) entry.getValue();
-      Preference[] prefsAsArray = list.toArray(new Preference[list.size()]);
-      Arrays.sort(prefsAsArray, ByUserPreferenceComparator.getInstance());
-      entry.setValue(prefsAsArray);
-    }
-    // Yeah more generics ugliness
-    this.preferenceForItems = (Map<Object, Preference[]>) (Map<Object, ?>) prefsForItems;
+    this.itemIDs = itemIDs.toArray();
+    itemIDs = null;
+    Arrays.sort(this.itemIDs);
+    //this.itemIDSet = new FastSet<Object>(this.itemIDs.length);
+    //itemIDSet.addAll(Arrays.asList(this.itemIDs));
   }
 
-  /**
-   * <p>Creates a new {@link GenericDataModel} containing an immutable copy of the data from another given {@link
-   * DataModel}.</p>
-   *
-   * @param dataModel {@link DataModel} to copy
-   * @throws TasteException if an error occurs while retrieving the other {@link DataModel}'s users
-   */
-  public GenericDataModel(DataModel dataModel) throws TasteException {
+  public GenericBooleanUserDataModel(DataModel dataModel) throws TasteException {
     this(dataModel.getUsers());
   }
 
@@ -132,7 +78,6 @@
     return users;
   }
 
-  /** @throws NoSuchUserException if there is no such {@link User} */
   @Override
   public User getUser(Object id) throws NoSuchUserException {
     User user = userMap.get(id);
@@ -144,34 +89,27 @@
 
   @Override
   public Iterable<? extends Item> getItems() {
-    return items;
+    throw new UnsupportedOperationException();
   }
 
-  /** @throws NoSuchItemException if there is no such {@link Item} */
   @Override
   public Item getItem(Object id) throws NoSuchItemException {
-    Item item = itemMap.get(id);
-    if (item == null) {
-      throw new NoSuchItemException();
-    }
-    return item;
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public Iterable<? extends Preference> getPreferencesForItem(Object itemID) {
-    Preference[] prefs = preferenceForItems.get(itemID);
-    return prefs == null ? NO_PREFS_ITERABLE : new ArrayIterator<Preference>(prefs);
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public Preference[] getPreferencesForItemAsArray(Object itemID) {
-    Preference[] prefs = preferenceForItems.get(itemID);
-    return prefs == null ? NO_PREFS_ARRAY : prefs;
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public int getNumItems() {
-    return items.size();
+    return itemIDs.length;
   }
 
   @Override
@@ -188,37 +126,18 @@
     if (length == 0 || length > 2) {
       throw new IllegalArgumentException("Illegal number of item IDs: " + length);
     }
-    if (length == 1) {
-      Preference[] prefs = preferenceForItems.get(itemIDs[0]);
-      return prefs == null ? 0 : prefs.length;
-    } else {
-      Preference[] prefs1 = preferenceForItems.get(itemIDs[0]);
-      Preference[] prefs2 = preferenceForItems.get(itemIDs[1]);
-      if (prefs1 == null || prefs2 == null) {
-        return 0;
-      }
-      Set<Object> users1 = new FastSet<Object>(prefs1.length);
-      for (Preference aPrefs1 : prefs1) {
-        users1.add(aPrefs1.getUser().getID());
-      }
-      Set<Object> users2 = new FastSet<Object>(prefs2.length);
-      for (Preference aPrefs2 : prefs2) {
-        users2.add(aPrefs2.getUser().getID());
-      }
-      users1.retainAll(users2);
-      return users1.size();
-    }
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public void setPreference(Object userID, Object itemID, double value)
       throws NoSuchUserException, NoSuchItemException {
-    getUser(userID).setPreference(getItem(itemID), value);
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public void removePreference(Object userID, Object itemID) throws NoSuchUserException {
-    getUser(userID).removePreference(itemID);
+    throw new UnsupportedOperationException();
   }
 
   @Override
@@ -228,7 +147,7 @@
 
   @Override
   public String toString() {
-    return "GenericDataModel[users:" + (users.size() > 3 ? users.subList(0, 3) + "..." : users) + ']';
+    return "GenericBooleanUserDataModel[users:" + (users.size() > 3 ? users.subList(0, 3) + "..." : users) + ']';
   }
 
-}
+}
\ No newline at end of file