You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/09 23:35:17 UTC
svn commit: r654943 [5/9] - in /lucene/mahout/trunk/core: ./ lib/
src/main/examples/org/ src/main/examples/org/apache/
src/main/examples/org/apache/mahout/ src/main/examples/org/apache/mahout/cf/
src/main/examples/org/apache/mahout/cf/taste/ src/main/e...
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java Fri May 9 14:35:12 2008
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.Collection;
+
+/**
+ * <p>Defines cluster similarity as the <em>smallest</em> correlation between any two
+ * {@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that is, it says that clusters are close
+ * when <em>all pairs</em> of their members have relatively high correlation.</p>
+ */
+public final class FarthestNeighborClusterSimilarity implements ClusterSimilarity {
+
+ private final UserCorrelation correlation;
+ private final double samplingPercentage;
+
+ /**
+ * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link UserCorrelation}.
+ * All user-user correlations are examined.</p>
+ *
+ * @param correlation
+ */
+ public FarthestNeighborClusterSimilarity(UserCorrelation correlation) {
+ this(correlation, 1.0);
+ }
+
+ /**
+ * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link UserCorrelation}.
+ * By setting <code>samplingPercentage</code> to a value less than 1.0, this implementation will only examine
+ * that fraction of all user-user correlations between two clusters, increasing performance at the expense
+ * of accuracy.</p>
+ *
+ * @param correlation
+ * @param samplingPercentage
+ */
+ public FarthestNeighborClusterSimilarity(UserCorrelation correlation, double samplingPercentage) {
+ if (correlation == null) {
+ throw new IllegalArgumentException("correlation is null");
+ }
+ if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage > 1.0) {
+ throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+ }
+ this.correlation = correlation;
+ this.samplingPercentage = samplingPercentage;
+ }
+
+ public double getSimilarity(Collection<User> cluster1,
+ Collection<User> cluster2) throws TasteException {
+ if (cluster1.isEmpty() || cluster2.isEmpty()) {
+ return Double.NaN;
+ }
+ double leastCorrelation = Double.POSITIVE_INFINITY;
+ for (User user1 : cluster1) {
+ if (samplingPercentage >= 1.0 || Math.random() < samplingPercentage) {
+ for (User user2 : cluster2) {
+ double theCorrelation = correlation.userCorrelation(user1, user2);
+ if (theCorrelation < leastCorrelation) {
+ leastCorrelation = theCorrelation;
+ }
+ }
+ }
+ }
+ // We skipped everything? well, at least try comparing the first Users to get some value
+ if (leastCorrelation == Double.POSITIVE_INFINITY) {
+ return correlation.userCorrelation(cluster1.iterator().next(), cluster2.iterator().next());
+ }
+ return leastCorrelation;
+ }
+
+ public void refresh() {
+ correlation.refresh();
+ }
+
+ @Override
+ public String toString() {
+ return "FarthestNeighborClusterSimilarity[correlation:" + correlation + ']';
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java Fri May 9 14:35:12 2008
@@ -0,0 +1,333 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A simple {@link org.apache.mahout.cf.taste.recommender.Recommender} which uses a given
+ * {@link org.apache.mahout.cf.taste.model.DataModel} and {@link org.apache.mahout.cf.taste.correlation.ItemCorrelation}
+ * to produce recommendations. This class represents Taste's support for item-based recommenders.</p>
+ *
+ * <p>The {@link ItemCorrelation} is the most important point to discuss here. Item-based recommenders
+ * are useful because they can take advantage of something to be very fast: they base their computations
+ * on item correlation, not user correlation, and item correlation is relatively static. It can be
+ * precomputed, instead of re-computed in real time.</p>
+ *
+ * <p>Thus it's strongly recommended that you use {@link org.apache.mahout.cf.taste.impl.correlation.GenericItemCorrelation}
+ * with pre-computed correlations if you're going to use this class. You can use
+ * {@link org.apache.mahout.cf.taste.impl.correlation.PearsonCorrelation} too, which computes correlations in real-time,
+ * but will probably find this painfully slow for large amounts of data.</p>
+ */
+public final class GenericItemBasedRecommender extends AbstractRecommender implements ItemBasedRecommender {
+
+ private static final Logger log = Logger.getLogger(GenericItemBasedRecommender.class.getName());
+
+ private final ItemCorrelation correlation;
+ private final ReentrantLock refreshLock;
+
+ public GenericItemBasedRecommender(DataModel dataModel, ItemCorrelation correlation) {
+ super(dataModel);
+ if (correlation == null) {
+ throw new IllegalArgumentException("correlation is null");
+ }
+ this.correlation = correlation;
+ this.refreshLock = new ReentrantLock();
+ }
+
+ public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+ throws TasteException {
+
+ if (userID == null) {
+ throw new IllegalArgumentException("userID is null");
+ }
+ if (howMany < 1) {
+ throw new IllegalArgumentException("howMany must be at least 1");
+ }
+ if (rescorer == null) {
+ throw new IllegalArgumentException("rescorer is null");
+ }
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommending items for user ID '" + userID + '\'');
+ }
+
+ User theUser = getDataModel().getUser(userID);
+ if (getNumPreferences(theUser) == 0) {
+ return Collections.emptyList();
+ }
+
+ Set<Item> allItems = getAllOtherItems(theUser);
+
+ TopItems.Estimator<Item> estimator = new Estimator(theUser);
+
+ List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItems, rescorer, estimator);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommendations are: " + topItems);
+ }
+ return topItems;
+ }
+
+ public double estimatePreference(Object userID, Object itemID) throws TasteException {
+ DataModel model = getDataModel();
+ User theUser = model.getUser(userID);
+ Preference actualPref = theUser.getPreferenceFor(itemID);
+ if (actualPref != null) {
+ return actualPref.getValue();
+ }
+ Item item = model.getItem(itemID);
+ return doEstimatePreference(theUser, item);
+ }
+
+ public List<RecommendedItem> mostSimilarItems(Object itemID, int howMany) throws TasteException {
+ return mostSimilarItems(itemID, howMany, NullRescorer.getItemItemPairInstance());
+ }
+
+ public List<RecommendedItem> mostSimilarItems(Object itemID,
+ int howMany,
+ Rescorer<Pair<Item, Item>> rescorer) throws TasteException {
+ if (rescorer == null) {
+ throw new IllegalArgumentException("rescorer is null");
+ }
+ Item toItem = getDataModel().getItem(itemID);
+ TopItems.Estimator<Item> estimator = new MostSimilarEstimator(toItem, correlation, rescorer);
+ return doMostSimilarItems(itemID, howMany, estimator);
+ }
+
+ public List<RecommendedItem> mostSimilarItems(List<Object> itemIDs, int howMany) throws TasteException {
+ return mostSimilarItems(itemIDs, howMany, NullRescorer.getItemItemPairInstance());
+ }
+
+ public List<RecommendedItem> mostSimilarItems(List<Object> itemIDs,
+ int howMany,
+ Rescorer<Pair<Item, Item>> rescorer) throws TasteException {
+ if (rescorer == null) {
+ throw new IllegalArgumentException("rescorer is null");
+ }
+ DataModel model = getDataModel();
+ List<Item> toItems = new ArrayList<Item>(itemIDs.size());
+ for (Object itemID : itemIDs) {
+ toItems.add(model.getItem(itemID));
+ }
+ TopItems.Estimator<Item> estimator = new MultiMostSimilarEstimator(toItems, correlation, rescorer);
+ Collection<Item> allItems = new HashSet<Item>(model.getNumItems());
+ for (Item item : model.getItems()) {
+ allItems.add(item);
+ }
+ for (Item item : toItems) {
+ allItems.remove(item);
+ }
+ return TopItems.getTopItems(howMany, allItems, NullRescorer.getItemInstance(), estimator);
+ }
+
+ public List<RecommendedItem> recommendedBecause(Object userID,
+ Object itemID,
+ int howMany) throws TasteException {
+ if (userID == null) {
+ throw new IllegalArgumentException("userID is null");
+ }
+ if (itemID == null) {
+ throw new IllegalArgumentException("itemID is null");
+ }
+ if (howMany < 1) {
+ throw new IllegalArgumentException("howMany must be at least 1");
+ }
+
+ DataModel model = getDataModel();
+ User user = model.getUser(userID);
+ Item recommendedItem = model.getItem(itemID);
+ TopItems.Estimator<Item> estimator = new RecommendedBecauseEstimator(user, recommendedItem, correlation);
+
+ Collection<Item> allUserItems = new HashSet<Item>();
+ Preference[] prefs = user.getPreferencesAsArray();
+ for (int i = 0; i < prefs.length; i++) {
+ allUserItems.add(prefs[i].getItem());
+ }
+ allUserItems.remove(recommendedItem);
+
+ return TopItems.getTopItems(howMany, allUserItems, NullRescorer.getItemInstance(), estimator);
+ }
+
+ private List<RecommendedItem> doMostSimilarItems(Object itemID,
+ int howMany,
+ TopItems.Estimator<Item> estimator) throws TasteException {
+ DataModel model = getDataModel();
+ Item toItem = model.getItem(itemID);
+ Collection<Item> allItems = new HashSet<Item>(model.getNumItems());
+ for (Item item : model.getItems()) {
+ allItems.add(item);
+ }
+ allItems.remove(toItem);
+ return TopItems.getTopItems(howMany, allItems, NullRescorer.getItemInstance(), estimator);
+ }
+
+ private double doEstimatePreference(User theUser, Item item) throws TasteException {
+ double preference = 0.0;
+ double totalCorrelation = 0.0;
+ Preference[] prefs = theUser.getPreferencesAsArray();
+ for (int i = 0; i < prefs.length; i++) {
+ Preference pref = prefs[i];
+ double theCorrelation = correlation.itemCorrelation(item, pref.getItem());
+ if (!Double.isNaN(theCorrelation)) {
+ // Why + 1.0? correlation ranges from -1.0 to 1.0, and we want to use it as a simple
+ // weight. To avoid negative values, we add 1.0 to put it in
+ // the [0.0,2.0] range which is reasonable for weights
+ theCorrelation += 1.0;
+ preference += theCorrelation * pref.getValue();
+ totalCorrelation += theCorrelation;
+ }
+ }
+ return totalCorrelation == 0.0 ? Double.NaN : preference / totalCorrelation;
+ }
+
+ private static int getNumPreferences(User theUser) {
+ return theUser.getPreferencesAsArray().length;
+ }
+
+ @Override
+ public void refresh() {
+ if (refreshLock.isLocked()) {
+ return;
+ }
+ try {
+ refreshLock.lock();
+ super.refresh();
+ correlation.refresh();
+ } finally {
+ refreshLock.unlock();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "GenericItemBasedRecommender[correlation:" + correlation + ']';
+ }
+
+ private static class MostSimilarEstimator implements TopItems.Estimator<Item> {
+
+ private final Item toItem;
+ private final ItemCorrelation correlation;
+ private final Rescorer<Pair<Item, Item>> rescorer;
+
+ private MostSimilarEstimator(Item toItem,
+ ItemCorrelation correlation,
+ Rescorer<Pair<Item, Item>> rescorer) {
+ this.toItem = toItem;
+ this.correlation = correlation;
+ this.rescorer = rescorer;
+ }
+
+ public double estimate(Item item) throws TasteException {
+ Pair<Item, Item> pair = new Pair<Item, Item>(toItem, item);
+ if (rescorer.isFiltered(pair)) {
+ return Double.NaN;
+ }
+ double originalEstimate = correlation.itemCorrelation(toItem, item);
+ return rescorer.rescore(pair, originalEstimate);
+ }
+ }
+
+ private final class Estimator implements TopItems.Estimator<Item> {
+
+ private final User theUser;
+
+ private Estimator(User theUser) {
+ this.theUser = theUser;
+ }
+
+ public double estimate(Item item) throws TasteException {
+ return doEstimatePreference(theUser, item);
+ }
+ }
+
+ private static class MultiMostSimilarEstimator implements TopItems.Estimator<Item> {
+
+ private final List<Item> toItems;
+ private final ItemCorrelation correlation;
+ private final Rescorer<Pair<Item, Item>> rescorer;
+
+ private MultiMostSimilarEstimator(List<Item> toItems,
+ ItemCorrelation correlation,
+ Rescorer<Pair<Item, Item>> rescorer) {
+ this.toItems = toItems;
+ this.correlation = correlation;
+ this.rescorer = rescorer;
+ }
+
+ public double estimate(Item item) throws TasteException {
+ RunningAverage average = new FullRunningAverage();
+ for (Item toItem : toItems) {
+ Pair<Item, Item> pair = new Pair<Item, Item>(toItem, item);
+ if (rescorer.isFiltered(pair)) {
+ continue;
+ }
+ double estimate = correlation.itemCorrelation(toItem, item);
+ estimate = rescorer.rescore(pair, estimate);
+ average.addDatum(estimate);
+ }
+ return average.getAverage();
+ }
+ }
+
+ private static class RecommendedBecauseEstimator implements TopItems.Estimator<Item> {
+
+ private final User user;
+ private final Item recommendedItem;
+ private final ItemCorrelation correlation;
+
+ private RecommendedBecauseEstimator(User user,
+ Item recommendedItem,
+ ItemCorrelation correlation) {
+ this.user = user;
+ this.recommendedItem = recommendedItem;
+ this.correlation = correlation;
+ }
+
+ public double estimate(Item item) throws TasteException {
+ Preference pref = user.getPreferenceFor(item.getID());
+ if (pref == null) {
+ return Double.NaN;
+ }
+ double correlationValue = correlation.itemCorrelation(recommendedItem, item);
+ return (1.0 + correlationValue) * pref.getValue();
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java Fri May 9 14:35:12 2008
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+import java.io.Serializable;
+
+/**
+ * <p>A simple implementation of {@link RecommendedItem}.</p>
+ */
+public final class GenericRecommendedItem implements RecommendedItem, Serializable {
+
+ private final Item item;
+ private final double value;
+
+ /**
+ * @param item
+ * @param value
+ * @throws IllegalArgumentException if item is null or value is NaN
+ */
+ public GenericRecommendedItem(Item item, double value) {
+ if (item == null) {
+ throw new IllegalArgumentException("item is null");
+ }
+ if (Double.isNaN(value)) {
+ throw new IllegalArgumentException("value is NaN");
+ }
+ this.item = item;
+ this.value = value;
+ }
+
+ public Item getItem() {
+ return item;
+ }
+
+ public double getValue() {
+ return value;
+ }
+
+ @Override
+ public String toString() {
+ return "RecommendedItem[item:" + item + ", value:" + value + ']';
+ }
+
+ @Override
+ public int hashCode() {
+ return item.hashCode() ^ Double.valueOf(value).hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof GenericRecommendedItem)) {
+ return false;
+ }
+ GenericRecommendedItem other = (GenericRecommendedItem) o;
+ return item.equals(other.item) && value == other.value;
+ }
+
+ /**
+ * Defines a natural ordering from most-preferred item (highest value) to least-preferred.
+ *
+ * @param other
+ * @return 1, -1, 0 as this value is less than, greater than or equal to the other's value
+ */
+ public int compareTo(RecommendedItem other) {
+ double otherValue = other.getValue();
+ if (value < otherValue) {
+ return 1;
+ } else if (value > otherValue) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java Fri May 9 14:35:12 2008
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A simple {@link Recommender} which uses a given {@link DataModel} and {@link UserNeighborhood}
+ * to produce recommendations.</p>
+ */
+public final class GenericUserBasedRecommender extends AbstractRecommender implements UserBasedRecommender {
+
+ private static final Logger log = Logger.getLogger(GenericUserBasedRecommender.class.getName());
+
+ private final UserNeighborhood neighborhood;
+ private final UserCorrelation correlation;
+ private final ReentrantLock refreshLock;
+
+ public GenericUserBasedRecommender(DataModel dataModel,
+ UserNeighborhood neighborhood,
+ UserCorrelation correlation) {
+ super(dataModel);
+ if (neighborhood == null) {
+ throw new IllegalArgumentException("neighborhood is null");
+ }
+ this.neighborhood = neighborhood;
+ this.correlation = correlation;
+ this.refreshLock = new ReentrantLock();
+ }
+
+ public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+ throws TasteException {
+ if (userID == null) {
+ throw new IllegalArgumentException("userID is null");
+ }
+ if (howMany < 1) {
+ throw new IllegalArgumentException("howMany must be at least 1");
+ }
+ if (rescorer == null) {
+ throw new IllegalArgumentException("rescorer is null");
+ }
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommending items for user ID '" + userID + '\'');
+ }
+
+ User theUser = getDataModel().getUser(userID);
+ Collection<User> theNeighborhood = neighborhood.getUserNeighborhood(userID);
+ if (log.isLoggable(Level.FINER)) {
+ log.finer("UserNeighborhood is: " + neighborhood);
+ }
+
+ if (theNeighborhood.isEmpty()) {
+ return Collections.emptyList();
+ }
+
+ Set<Item> allItems = getAllOtherItems(theNeighborhood, theUser);
+ if (log.isLoggable(Level.FINER)) {
+ log.finer("Items in neighborhood which user doesn't prefer already are: " + allItems);
+ }
+
+ TopItems.Estimator<Item> estimator = new Estimator(theUser, theNeighborhood);
+
+ List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItems, rescorer, estimator);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommendations are: " + topItems);
+ }
+ return topItems;
+ }
+
+ public double estimatePreference(Object userID, Object itemID) throws TasteException {
+ DataModel model = getDataModel();
+ User theUser = model.getUser(userID);
+ Preference actualPref = theUser.getPreferenceFor(itemID);
+ if (actualPref != null) {
+ return actualPref.getValue();
+ }
+ Collection<User> theNeighborhood = neighborhood.getUserNeighborhood(userID);
+ Item item = model.getItem(itemID);
+ return doEstimatePreference(theUser, theNeighborhood, item);
+ }
+
+ public List<User> mostSimilarUsers(Object userID, int howMany) throws TasteException {
+ return mostSimilarUsers(userID, howMany, NullRescorer.getUserUserPairInstance());
+ }
+
+ public List<User> mostSimilarUsers(Object userID,
+ int howMany,
+ Rescorer<Pair<User, User>> rescorer) throws TasteException {
+ if (rescorer == null) {
+ throw new IllegalArgumentException("rescorer is null");
+ }
+ User toUser = getDataModel().getUser(userID);
+ TopItems.Estimator<User> estimator = new MostSimilarEstimator(toUser, correlation, rescorer);
+ return doMostSimilarUsers(userID, howMany, estimator);
+ }
+
+ private List<User> doMostSimilarUsers(Object userID,
+ int howMany,
+ TopItems.Estimator<User> estimator) throws TasteException {
+ DataModel model = getDataModel();
+ User toUser = model.getUser(userID);
+ Collection<User> allUsers = new HashSet<User>(model.getNumUsers());
+ for (User user : model.getUsers()) {
+ allUsers.add(user);
+ }
+ allUsers.remove(toUser);
+ return TopItems.getTopUsers(howMany, allUsers, NullRescorer.getUserInstance(), estimator);
+ }
+
+ private double doEstimatePreference(User theUser, Collection<User> theNeighborhood, Item item)
+ throws TasteException {
+ if (theNeighborhood.isEmpty()) {
+ return Double.NaN;
+ }
+ double preference = 0.0;
+ double totalCorrelation = 0.0;
+ for (User user : theNeighborhood) {
+ if (!user.equals(theUser)) {
+ // See GenericItemBasedRecommender.doEstimatePreference() too
+ Preference pref = user.getPreferenceFor(item.getID());
+ if (pref != null) {
+ double theCorrelation = correlation.userCorrelation(theUser, user) + 1.0;
+ if (!Double.isNaN(theCorrelation)) {
+ preference += theCorrelation * pref.getValue();
+ totalCorrelation += theCorrelation;
+ }
+ }
+ }
+ }
+ return totalCorrelation == 0.0 ? Double.NaN : preference / totalCorrelation;
+ }
+
+ private static Set<Item> getAllOtherItems(Iterable<User> theNeighborhood, User theUser) {
+ Set<Item> allItems = new HashSet<Item>();
+ for (User user : theNeighborhood) {
+ Preference[] prefs = user.getPreferencesAsArray();
+ for (int i = 0; i < prefs.length; i++) {
+ Item item = prefs[i].getItem();
+ // If not already preferred by the user, add it
+ if (theUser.getPreferenceFor(item.getID()) == null) {
+ allItems.add(item);
+ }
+ }
+ }
+ return allItems;
+ }
+
+ @Override
+ public void refresh() {
+ if (refreshLock.isLocked()) {
+ return;
+ }
+ try {
+ refreshLock.lock();
+ super.refresh();
+ neighborhood.refresh();
+ } finally {
+ refreshLock.unlock();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "GenericUserBasedRecommender[neighborhood:" + neighborhood + ']';
+ }
+
+ private static class MostSimilarEstimator implements TopItems.Estimator<User> {
+
+ private final User toUser;
+ private final UserCorrelation correlation;
+ private final Rescorer<Pair<User, User>> rescorer;
+
+ private MostSimilarEstimator(User toUser,
+ UserCorrelation correlation,
+ Rescorer<Pair<User, User>> rescorer) {
+ this.toUser = toUser;
+ this.correlation = correlation;
+ this.rescorer = rescorer;
+ }
+
+ public double estimate(User user) throws TasteException {
+ Pair<User, User> pair = new Pair<User, User>(toUser, user);
+ if (rescorer.isFiltered(pair)) {
+ return Double.NaN;
+ }
+ double originalEstimate = correlation.userCorrelation(toUser, user);
+ return rescorer.rescore(pair, originalEstimate);
+ }
+ }
+
+ private final class Estimator implements TopItems.Estimator<Item> {
+
+ private final User theUser;
+ private final Collection<User> theNeighborhood;
+
+ Estimator(User theUser, Collection<User> theNeighborhood) {
+ this.theUser = theUser;
+ this.theNeighborhood = theNeighborhood;
+ }
+
+ public double estimate(Item item) throws TasteException {
+ return doEstimatePreference(theUser, theNeighborhood, item);
+ }
+ }
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java Fri May 9 14:35:12 2008
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A simple recommender that always estimates preference for an {@link Item} to be the average of
+ * all known preference values for that {@link Item}. No information about {@link User}s is taken into
+ * account. This implementation is provided for experimentation; while simple and fast, it may not
+ * produce very good recommendations.</p>
+ */
+public final class ItemAverageRecommender extends AbstractRecommender {
+
+ private static final Logger log = Logger.getLogger(ItemAverageRecommender.class.getName());
+
+ private final Map<Object, RunningAverage> itemAverages;
+ private boolean averagesBuilt;
+ private final ReentrantLock refreshLock;
+ private final ReadWriteLock buildAveragesLock;
+
+ public ItemAverageRecommender(DataModel dataModel) {
+ super(dataModel);
+ this.itemAverages = new HashMap<Object, RunningAverage>(1003);
+ this.refreshLock = new ReentrantLock();
+ this.buildAveragesLock = new ReentrantReadWriteLock();
+ }
+
+ public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+ throws TasteException {
+ if (userID == null) {
+ throw new IllegalArgumentException("userID is null");
+ }
+ if (howMany < 1) {
+ throw new IllegalArgumentException("howMany must be at least 1");
+ }
+ if (rescorer == null) {
+ throw new IllegalArgumentException("rescorer is null");
+ }
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommending items for user ID '" + userID + '\'');
+ }
+ checkAverageDiffsBuilt();
+
+ User theUser = getDataModel().getUser(userID);
+ Set<Item> allItems = getAllOtherItems(theUser);
+
+ TopItems.Estimator<Item> estimator = new Estimator();
+
+ List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItems, rescorer, estimator);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommendations are: " + topItems);
+ }
+ return topItems;
+ }
+
+ public double estimatePreference(Object userID, Object itemID) throws TasteException {
+ DataModel model = getDataModel();
+ User theUser = model.getUser(userID);
+ Preference actualPref = theUser.getPreferenceFor(itemID);
+ if (actualPref != null) {
+ return actualPref.getValue();
+ }
+ checkAverageDiffsBuilt();
+ return doEstimatePreference(itemID);
+ }
+
+ private double doEstimatePreference(Object itemID) {
+ buildAveragesLock.readLock().lock();
+ try {
+ RunningAverage average = itemAverages.get(itemID);
+ return average == null ? Double.NaN : average.getAverage();
+ } finally {
+ buildAveragesLock.readLock().unlock();
+ }
+ }
+
+ private void checkAverageDiffsBuilt() throws TasteException {
+ if (!averagesBuilt) {
+ buildAverageDiffs();
+ }
+ }
+
+ private void buildAverageDiffs() throws TasteException {
+ try {
+ buildAveragesLock.writeLock().lock();
+ DataModel dataModel = getDataModel();
+ for (User user : dataModel.getUsers()) {
+ Preference[] prefs = user.getPreferencesAsArray();
+ for (int i = 0; i < prefs.length; i++) {
+ Preference pref = prefs[i];
+ Object itemID = pref.getItem().getID();
+ RunningAverage average = itemAverages.get(itemID);
+ if (average == null) {
+ average = new FullRunningAverage();
+ itemAverages.put(itemID, average);
+ }
+ average.addDatum(pref.getValue());
+ }
+ }
+ averagesBuilt = true;
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+
+ @Override
+ public void setPreference(Object userID, Object itemID, double value) throws TasteException {
+ DataModel dataModel = getDataModel();
+ double prefDelta;
+ try {
+ User theUser = dataModel.getUser(userID);
+ Preference oldPref = theUser.getPreferenceFor(itemID);
+ prefDelta = oldPref == null ? value : value - oldPref.getValue();
+ } catch (NoSuchElementException nsee) {
+ prefDelta = value;
+ }
+ super.setPreference(userID, itemID, value);
+ try {
+ buildAveragesLock.writeLock().lock();
+ RunningAverage average = itemAverages.get(itemID);
+ if (average == null) {
+ RunningAverage newAverage = new FullRunningAverage();
+ newAverage.addDatum(prefDelta);
+ itemAverages.put(itemID, newAverage);
+ } else {
+ average.changeDatum(prefDelta);
+ }
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+
+ @Override
+ public void removePreference(Object userID, Object itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
+ User theUser = dataModel.getUser(userID);
+ Preference oldPref = theUser.getPreferenceFor(itemID);
+ super.removePreference(userID, itemID);
+ if (oldPref != null) {
+ try {
+ buildAveragesLock.writeLock().lock();
+ RunningAverage average = itemAverages.get(itemID);
+ if (average == null) {
+ throw new IllegalStateException("No preferences exist for item ID: " + itemID);
+ } else {
+ average.removeDatum(oldPref.getValue());
+ }
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+ }
+
+ @Override
+ public void refresh() {
+ if (refreshLock.isLocked()) {
+ return;
+ }
+ try {
+ refreshLock.lock();
+ super.refresh();
+ try {
+ buildAverageDiffs();
+ } catch (TasteException te) {
+ log.log(Level.WARNING, "Unexpected excpetion while refreshing", te);
+ }
+ } finally {
+ refreshLock.unlock();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "ItemAverageRecommender";
+ }
+
+ private final class Estimator implements TopItems.Estimator<Item> {
+
+ public double estimate(Item item) {
+ return doEstimatePreference(item.getID());
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java Fri May 9 14:35:12 2008
@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>Like {@link ItemAverageRecommender}, except that estimated preferences are adjusted for the
+ * {@link User}s' average preference value. For example, say user X has not rated item Y. Item Y's
+ * average preference value is 3.5. User X's average preference value is 4.2, and the average over all
+ * preference values is 4.0. User X prefers items 0.2 higher on average, so, the estimated preference
+ * for user X, item Y is 3.5 + 0.2 = 3.7.</p>
+ */
+public final class ItemUserAverageRecommender extends AbstractRecommender {
+
+ private static final Logger log = Logger.getLogger(ItemUserAverageRecommender.class.getName());
+
+ private final Map<Object, RunningAverage> itemAverages;
+ private final Map<Object, RunningAverage> userAverages;
+ private final RunningAverage overallAveragePrefValue;
+ private boolean averagesBuilt;
+ private final ReentrantLock refreshLock;
+ private final ReadWriteLock buildAveragesLock;
+
+ public ItemUserAverageRecommender(DataModel dataModel) {
+ super(dataModel);
+ this.itemAverages = new HashMap<Object, RunningAverage>(1003);
+ this.userAverages = new HashMap<Object, RunningAverage>(1003);
+ this.overallAveragePrefValue = new FullRunningAverage();
+ this.refreshLock = new ReentrantLock();
+ this.buildAveragesLock = new ReentrantReadWriteLock();
+ }
+
+ public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+ throws TasteException {
+ if (userID == null) {
+ throw new IllegalArgumentException("userID is null");
+ }
+ if (howMany < 1) {
+ throw new IllegalArgumentException("howMany must be at least 1");
+ }
+ if (rescorer == null) {
+ throw new IllegalArgumentException("rescorer is null");
+ }
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommending items for user ID '" + userID + '\'');
+ }
+ checkAverageDiffsBuilt();
+
+ User theUser = getDataModel().getUser(userID);
+ Set<Item> allItems = getAllOtherItems(theUser);
+
+ TopItems.Estimator<Item> estimator = new Estimator(userID);
+
+ List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItems, rescorer, estimator);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommendations are: " + topItems);
+ }
+ return topItems;
+ }
+
+ public double estimatePreference(Object userID, Object itemID) throws TasteException {
+ DataModel model = getDataModel();
+ User theUser = model.getUser(userID);
+ Preference actualPref = theUser.getPreferenceFor(itemID);
+ if (actualPref != null) {
+ return actualPref.getValue();
+ }
+ checkAverageDiffsBuilt();
+ return doEstimatePreference(userID, itemID);
+ }
+
+ private double doEstimatePreference(Object userID, Object itemID) {
+ buildAveragesLock.readLock().lock();
+ try {
+ RunningAverage itemAverage = itemAverages.get(itemID);
+ if (itemAverage == null) {
+ return Double.NaN;
+ }
+ RunningAverage userAverage = userAverages.get(userID);
+ if (userAverage == null) {
+ return Double.NaN;
+ }
+ double userDiff = userAverage.getAverage() - overallAveragePrefValue.getAverage();
+ return itemAverage.getAverage() + userDiff;
+ } finally {
+ buildAveragesLock.readLock().unlock();
+ }
+ }
+
+ private void checkAverageDiffsBuilt() throws TasteException {
+ if (!averagesBuilt) {
+ buildAverageDiffs();
+ }
+ }
+
+ private void buildAverageDiffs() throws TasteException {
+ try {
+ buildAveragesLock.writeLock().lock();
+ DataModel dataModel = getDataModel();
+ for (User user : dataModel.getUsers()) {
+ Object userID = user.getID();
+ Preference[] prefs = user.getPreferencesAsArray();
+ for (int i = 0; i < prefs.length; i++) {
+ Preference pref = prefs[i];
+ Object itemID = pref.getItem().getID();
+ double value = pref.getValue();
+ addDatumAndCrateIfNeeded(itemID, value, itemAverages);
+ addDatumAndCrateIfNeeded(userID, value, userAverages);
+ overallAveragePrefValue.addDatum(value);
+ }
+ }
+ averagesBuilt = true;
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+
+ private static void addDatumAndCrateIfNeeded(Object itemID,
+ double value,
+ Map<Object, RunningAverage> averages) {
+ RunningAverage itemAverage = averages.get(itemID);
+ if (itemAverage == null) {
+ itemAverage = new FullRunningAverage();
+ averages.put(itemID, itemAverage);
+ }
+ itemAverage.addDatum(value);
+ }
+
+ @Override
+ public void setPreference(Object userID, Object itemID, double value) throws TasteException {
+ DataModel dataModel = getDataModel();
+ double prefDelta;
+ try {
+ User theUser = dataModel.getUser(userID);
+ Preference oldPref = theUser.getPreferenceFor(itemID);
+ prefDelta = oldPref == null ? value : value - oldPref.getValue();
+ } catch (NoSuchElementException nsee) {
+ prefDelta = value;
+ }
+ super.setPreference(userID, itemID, value);
+ try {
+ buildAveragesLock.writeLock().lock();
+ RunningAverage itemAverage = itemAverages.get(itemID);
+ if (itemAverage == null) {
+ RunningAverage newItemAverage = new FullRunningAverage();
+ newItemAverage.addDatum(prefDelta);
+ itemAverages.put(itemID, newItemAverage);
+ } else {
+ itemAverage.changeDatum(prefDelta);
+ }
+ RunningAverage userAverage = userAverages.get(userID);
+ if (userAverage == null) {
+ RunningAverage newUserAveragae = new FullRunningAverage();
+ newUserAveragae.addDatum(prefDelta);
+ userAverages.put(userID, newUserAveragae);
+ } else {
+ userAverage.changeDatum(prefDelta);
+ }
+ overallAveragePrefValue.changeDatum(prefDelta);
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+
+ @Override
+ public void removePreference(Object userID, Object itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
+ User theUser = dataModel.getUser(userID);
+ Preference oldPref = theUser.getPreferenceFor(itemID);
+ super.removePreference(userID, itemID);
+ if (oldPref != null) {
+ double value = oldPref.getValue();
+ try {
+ buildAveragesLock.writeLock().lock();
+ RunningAverage itemAverage = itemAverages.get(itemID);
+ if (itemAverage == null) {
+ throw new IllegalStateException("No preferences exist for item ID: " + itemID);
+ }
+ itemAverage.removeDatum(value);
+ RunningAverage userAverage = userAverages.get(userID);
+ if (userAverage == null) {
+ throw new IllegalStateException("No preferences exist for user ID: " + userID);
+ }
+ userAverage.removeDatum(value);
+ overallAveragePrefValue.removeDatum(value);
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+ }
+
+ @Override
+ public void refresh() {
+ if (refreshLock.isLocked()) {
+ return;
+ }
+ try {
+ refreshLock.lock();
+ super.refresh();
+ try {
+ buildAverageDiffs();
+ } catch (TasteException te) {
+ log.log(Level.WARNING, "Unexpected excpetion while refreshing", te);
+ }
+ } finally {
+ refreshLock.unlock();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "ItemUserAverageRecommender";
+ }
+
+ private final class Estimator implements TopItems.Estimator<Item> {
+
+ private final Object userID;
+
+ private Estimator(Object userID) {
+ this.userID = userID;
+ }
+
+ public double estimate(Item item) {
+ return doEstimatePreference(userID, item.getID());
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java Fri May 9 14:35:12 2008
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.Collection;
+
+/**
+ * <p>Defines cluster similarity as the <em>largest</em> correlation between any two
+ * {@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that is, it says that clusters are close
+ * when <em>some pair</em> of their members has high correlation.</p>
+ */
+public final class NearestNeighborClusterSimilarity implements ClusterSimilarity {
+
+ private final UserCorrelation correlation;
+ private final double samplingPercentage;
+
+ /**
+ * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link UserCorrelation}.
+ * All user-user correlations are examined.</p>
+ *
+ * @param correlation
+ */
+ public NearestNeighborClusterSimilarity(UserCorrelation correlation) {
+ this(correlation, 1.0);
+ }
+
+ /**
+ * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link UserCorrelation}.
+ * By setting <code>samplingPercentage</code> to a value less than 1.0, this implementation will only examine
+ * that fraction of all user-user correlations between two clusters, increasing performance at the expense
+ * of accuracy.</p>
+ *
+ * @param correlation
+ * @param samplingPercentage
+ */
+ public NearestNeighborClusterSimilarity(UserCorrelation correlation, double samplingPercentage) {
+ if (correlation == null) {
+ throw new IllegalArgumentException("correlation is null");
+ }
+ if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage > 1.0) {
+ throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+ }
+ this.correlation = correlation;
+ this.samplingPercentage = samplingPercentage;
+ }
+
+ public double getSimilarity(Collection<User> cluster1,
+ Collection<User> cluster2) throws TasteException {
+ if (cluster1.isEmpty() || cluster2.isEmpty()) {
+ return Double.NaN;
+ }
+ double greatestCorrelation = Double.NEGATIVE_INFINITY;
+ for (User user1 : cluster1) {
+ if (samplingPercentage >= 1.0 || Math.random() < samplingPercentage) {
+ for (User user2 : cluster2) {
+ double theCorrelation = correlation.userCorrelation(user1, user2);
+ if (theCorrelation > greatestCorrelation) {
+ greatestCorrelation = theCorrelation;
+ }
+ }
+ }
+ }
+ // We skipped everything? well, at least try comparing the first Users to get some value
+ if (greatestCorrelation == Double.NEGATIVE_INFINITY) {
+ return correlation.userCorrelation(cluster1.iterator().next(), cluster2.iterator().next());
+ }
+ return greatestCorrelation;
+ }
+
+ public void refresh() {
+ correlation.refresh();
+ }
+
+ @Override
+ public String toString() {
+ return "NearestNeighborClusterSimilarity[correlation:" + correlation + ']';
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java Fri May 9 14:35:12 2008
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+/**
+ * <p>A simple {@link Rescorer} which always returns the original score.</p>
+ */
+public final class NullRescorer<T> implements Rescorer<T> {
+
+ private static final Rescorer<Item> itemInstance = new NullRescorer<Item>();
+ private static final Rescorer<User> userInstance = new NullRescorer<User>();
+ private static final Rescorer<Pair<Item, Item>> itemItemPairInstance = new NullRescorer<Pair<Item, Item>>();
+ private static final Rescorer<Pair<User, User>> userUserPairInstance = new NullRescorer<Pair<User, User>>();
+
+ public static Rescorer<Item> getItemInstance() {
+ return itemInstance;
+ }
+
+ public static Rescorer<User> getUserInstance() {
+ return userInstance;
+ }
+
+ public static Rescorer<Pair<Item, Item>> getItemItemPairInstance() {
+ return itemItemPairInstance;
+ }
+
+ public static Rescorer<Pair<User, User>> getUserUserPairInstance() {
+ return userUserPairInstance;
+ }
+
+ private NullRescorer() {
+ // do nothing
+ }
+
+ /**
+ * @param thing to rescore
+ * @param originalScore current score for {@link Item}
+ * @return same originalScore as new score, always
+ */
+ public double rescore(T thing, double originalScore) {
+ return originalScore;
+ }
+
+ public boolean isFiltered(T thing) {
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return "NullRescorer";
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java Fri May 9 14:35:12 2008
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.correlation.GenericItemCorrelation;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+
+/**
+ * <p>A simple class that refactors the "find top N recommended items" logic that is used in
+ * several places in Taste.</p>
+ */
+public final class TopItems {
+
+ private TopItems() {
+ }
+
+ public static List<RecommendedItem> getTopItems(int howMany,
+ Iterable<Item> allItems,
+ Rescorer<Item> rescorer,
+ Estimator<Item> estimator) throws TasteException {
+ if (allItems == null || rescorer == null || estimator == null) {
+ throw new IllegalArgumentException("argument is null");
+ }
+ LinkedList<RecommendedItem> topItems = new LinkedList<RecommendedItem>();
+ boolean full = false;
+ for (Item item : allItems) {
+ if (item.isRecommendable() && !rescorer.isFiltered(item)) {
+ double preference = estimator.estimate(item);
+ double rescoredPref = rescorer.rescore(item, preference);
+ if (!Double.isNaN(rescoredPref) && (!full || rescoredPref > topItems.getLast().getValue())) {
+ // I think this is faster than Collections.binarySearch() over a LinkedList since our
+ // comparisons are cheap, which binarySearch() economizes at the expense of more traversals.
+ // We also know that the right position tends to be at the end of the list.
+ ListIterator<RecommendedItem> iterator = topItems.listIterator(topItems.size());
+ while (iterator.hasPrevious()) {
+ if (rescoredPref <= iterator.previous().getValue()) {
+ iterator.next();
+ break;
+ }
+ }
+ iterator.add(new GenericRecommendedItem(item, rescoredPref));
+ if (full) {
+ topItems.removeLast();
+ } else if (topItems.size() > howMany) {
+ full = true;
+ topItems.removeLast();
+ }
+ }
+ }
+ }
+ return topItems;
+ }
+
+ public static List<User> getTopUsers(int howMany,
+ Iterable<User> allUsers,
+ Rescorer<User> rescorer,
+ Estimator<User> estimator) throws TasteException {
+ LinkedList<SimilarUser> topUsers = new LinkedList<SimilarUser>();
+ boolean full = false;
+ for (User user : allUsers) {
+ if (rescorer.isFiltered(user)) {
+ continue;
+ }
+ double similarity = estimator.estimate(user);
+ double rescoredSimilarity = rescorer.rescore(user, similarity);
+ if (!Double.isNaN(rescoredSimilarity) &&
+ (!full || rescoredSimilarity > topUsers.getLast().getSimilarity())) {
+ ListIterator<SimilarUser> iterator = topUsers.listIterator(topUsers.size());
+ while (iterator.hasPrevious()) {
+ if (rescoredSimilarity <= iterator.previous().getSimilarity()) {
+ iterator.next();
+ break;
+ }
+ }
+ iterator.add(new SimilarUser(user, similarity));
+ if (full) {
+ topUsers.removeLast();
+ } else if (topUsers.size() > howMany) {
+ full = true;
+ topUsers.removeLast();
+ }
+ }
+ }
+ List<User> result = new ArrayList<User>(topUsers.size());
+ for (SimilarUser similarUser : topUsers) {
+ result.add(similarUser.getUser());
+ }
+ return result;
+ }
+
+ /**
+ * <p>Thanks to tsmorton for suggesting this functionality and writing part of the code.</p>
+ *
+ * @see GenericItemCorrelation#GenericItemCorrelation(Iterable, int)
+ * @see GenericItemCorrelation#GenericItemCorrelation(org.apache.mahout.cf.taste.correlation.ItemCorrelation , org.apache.mahout.cf.taste.model.DataModel , int)
+ */
+ public static List<GenericItemCorrelation.ItemItemCorrelation> getTopItemItemCorrelations(
+ int howMany, Iterable<GenericItemCorrelation.ItemItemCorrelation> allCorrelations) {
+ LinkedList<GenericItemCorrelation.ItemItemCorrelation> topCorrelations =
+ new LinkedList<GenericItemCorrelation.ItemItemCorrelation>();
+ boolean full = false;
+ for (GenericItemCorrelation.ItemItemCorrelation correlation : allCorrelations) {
+ double value = correlation.getValue();
+ if (!full || value > topCorrelations.getLast().getValue()) {
+ ListIterator<GenericItemCorrelation.ItemItemCorrelation> iterator =
+ topCorrelations.listIterator(topCorrelations.size());
+ while (iterator.hasPrevious()) {
+ if (value <= iterator.previous().getValue()) {
+ iterator.next();
+ break;
+ }
+ }
+ iterator.add(correlation);
+ if (full) {
+ topCorrelations.removeLast();
+ } else if (topCorrelations.size() > howMany) {
+ full = true;
+ topCorrelations.removeLast();
+ }
+ }
+ }
+ return topCorrelations;
+ }
+
+ public static interface Estimator<T> {
+
+ double estimate(T thing) throws TasteException;
+ }
+
+ // Hmm, should this be exposed publicly like RecommendedItem?
+ private static class SimilarUser implements User {
+
+ private final User user;
+ private final double similarity;
+
+ private SimilarUser(User user, double similarity) {
+ this.user = user;
+ this.similarity = similarity;
+ }
+
+ public Object getID() {
+ return user.getID();
+ }
+
+ public Preference getPreferenceFor(Object itemID) {
+ return user.getPreferenceFor(itemID);
+ }
+
+ public Iterable<Preference> getPreferences() {
+ return user.getPreferences();
+ }
+
+ public Preference[] getPreferencesAsArray() {
+ return user.getPreferencesAsArray();
+ }
+
+ public User getUser() {
+ return user;
+ }
+
+ public double getSimilarity() {
+ return similarity;
+ }
+
+ @Override
+ public int hashCode() {
+ return user.hashCode() ^ Double.valueOf(similarity).hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof SimilarUser)) {
+ return false;
+ }
+ SimilarUser other = (SimilarUser) o;
+ return user.equals(other.user) && similarity == other.similarity;
+ }
+
+ public int compareTo(User user) {
+ return this.user.compareTo(user);
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java Fri May 9 14:35:12 2008
@@ -0,0 +1,431 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.ClusteringRecommender;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters {@link User}s, then determines
+ * the clusters' top recommendations. This implementation builds clusters by repeatedly merging clusters
+ * until only a certain number remain, meaning that each cluster is sort of a tree of other clusters.</p>
+ *
+ * <p>This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:</p>
+ * <ul>
+ * <li>For all {@link User}s in a cluster, recommendations will be the same</li>
+ * <li>{@link #estimatePreference(Object, Object)} may well return {@link Double#NaN}; it does so when asked
+ * to estimate preference for an {@link Item} for which no preference is expressed in the {@link User}s in
+ * the cluster.</li>
+ * </ul>
+ */
+public final class TreeClusteringRecommender extends AbstractRecommender implements ClusteringRecommender {
+
+ private static final Logger log = Logger.getLogger(TreeClusteringRecommender.class.getName());
+
+ private final ClusterSimilarity clusterSimilarity;
+ private final int numClusters;
+ private final double clusteringThreshold;
+ private final boolean clusteringByThreshold;
+ private final double samplingPercentage;
+ private Map<Object, List<RecommendedItem>> topRecsByUserID;
+ private Collection<Collection<User>> allClusters;
+ private Map<Object, Collection<User>> clustersByUserID;
+ private boolean clustersBuilt;
+ private final ReentrantLock refreshLock;
+ private final ReentrantLock buildClustersLock;
+
+ /**
+ * @param dataModel {@link DataModel} which provdes {@link User}s
+ * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
+ * @param numClusters desired number of clusters to create
+ * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is
+ * less than 2
+ */
+ public TreeClusteringRecommender(DataModel dataModel,
+ ClusterSimilarity clusterSimilarity,
+ int numClusters) {
+ this(dataModel, clusterSimilarity, numClusters, 1.0);
+ }
+
+ /**
+ * @param dataModel {@link DataModel} which provdes {@link User}s
+ * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
+ * @param numClusters desired number of clusters to create
+ * @param samplingPercentage percentage of all cluster-cluster pairs to consider when finding
+ * next-most-similar clusters. Decreasing this value from 1.0 can increase performance at the
+ * cost of accuracy
+ * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is
+ * less than 2, or samplingPercentage is {@link Double#NaN} or nonpositive or greater than 1.0
+ */
+ public TreeClusteringRecommender(DataModel dataModel,
+ ClusterSimilarity clusterSimilarity,
+ int numClusters,
+ double samplingPercentage) {
+ super(dataModel);
+ if (clusterSimilarity == null) {
+ throw new IllegalArgumentException("clusterSimilarity is null");
+ }
+ if (numClusters < 2) {
+ throw new IllegalArgumentException("numClusters must be at least 2");
+ }
+ if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage > 1.0) {
+ throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+ }
+ this.clusterSimilarity = clusterSimilarity;
+ this.numClusters = numClusters;
+ this.clusteringThreshold = Double.NaN;
+ this.clusteringByThreshold = false;
+ this.samplingPercentage = samplingPercentage;
+ this.refreshLock = new ReentrantLock();
+ this.buildClustersLock = new ReentrantLock();
+ }
+
+ /**
+ * @param dataModel {@link DataModel} which provdes {@link User}s
+ * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
+ * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger
+ * clusters until the next two nearest clusters' similarity drops below this threshold
+ * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>clusteringThreshold</code> is
+ * {@link Double#NaN}
+ */
+ public TreeClusteringRecommender(DataModel dataModel,
+ ClusterSimilarity clusterSimilarity,
+ double clusteringThreshold) {
+ this(dataModel, clusterSimilarity, clusteringThreshold, 1.0);
+ }
+
+ /**
+ * @param dataModel {@link DataModel} which provdes {@link User}s
+ * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
+ * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger
+ * clusters until the next two nearest clusters' similarity drops below this threshold
+ * @param samplingPercentage percentage of all cluster-cluster pairs to consider when finding
+ * next-most-similar clusters. Decreasing this value from 1.0 can increase performance at the
+ * cost of accuracy
+ * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>clusteringThreshold</code> is
+ * {@link Double#NaN}, or samplingPercentage is {@link Double#NaN} or nonpositive or greater than 1.0
+ */
+ public TreeClusteringRecommender(DataModel dataModel,
+ ClusterSimilarity clusterSimilarity,
+ double clusteringThreshold,
+ double samplingPercentage) {
+ super(dataModel);
+ if (clusterSimilarity == null) {
+ throw new IllegalArgumentException("clusterSimilarity is null");
+ }
+ if (Double.isNaN(clusteringThreshold)) {
+ throw new IllegalArgumentException("clusteringThreshold must not be NaN");
+ }
+ if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage > 1.0) {
+ throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+ }
+ this.clusterSimilarity = clusterSimilarity;
+ this.numClusters = Integer.MIN_VALUE;
+ this.clusteringThreshold = clusteringThreshold;
+ this.clusteringByThreshold = true;
+ this.samplingPercentage = samplingPercentage;
+ this.refreshLock = new ReentrantLock();
+ this.buildClustersLock = new ReentrantLock();
+ }
+
+ public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+ throws TasteException {
+ if (userID == null || rescorer == null) {
+ throw new IllegalArgumentException("userID or rescorer is null");
+ }
+ if (howMany < 1) {
+ throw new IllegalArgumentException("howMany must be at least 1");
+ }
+ checkClustersBuilt();
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommending items for user ID '" + userID + '\'');
+ }
+
+ List<RecommendedItem> recommended = topRecsByUserID.get(userID);
+ if (recommended == null) {
+ return Collections.emptyList();
+ }
+
+ User theUser = getDataModel().getUser(userID);
+ List<RecommendedItem> rescored = new ArrayList<RecommendedItem>(recommended.size());
+ // Only add items the user doesn't already have a preference for.
+ // And that the rescorer doesn't "reject".
+ for (RecommendedItem recommendedItem : recommended) {
+ Item item = recommendedItem.getItem();
+ if (rescorer.isFiltered(item)) {
+ continue;
+ }
+ if (theUser.getPreferenceFor(item.getID()) == null &&
+ !Double.isNaN(rescorer.rescore(item, recommendedItem.getValue()))) {
+ rescored.add(recommendedItem);
+ }
+ }
+ Collections.sort(rescored, new ByRescoreComparator(rescorer));
+
+ return rescored;
+ }
+
+ public double estimatePreference(Object userID, Object itemID) throws TasteException {
+ if (userID == null || itemID == null) {
+ throw new IllegalArgumentException("userID or itemID is null");
+ }
+ DataModel model = getDataModel();
+ User theUser = model.getUser(userID);
+ Preference actualPref = theUser.getPreferenceFor(itemID);
+ if (actualPref != null) {
+ return actualPref.getValue();
+ }
+ checkClustersBuilt();
+ List<RecommendedItem> topRecsForUser = topRecsByUserID.get(userID);
+ if (topRecsForUser != null) {
+ for (RecommendedItem item : topRecsForUser) {
+ if (itemID.equals(item.getItem().getID())) {
+ return item.getValue();
+ }
+ }
+ }
+ // Hmm, we have no idea. The item is not in the user's cluster
+ return Double.NaN;
+ }
+
+ public Collection<User> getCluster(Object userID) throws TasteException {
+ if (userID == null) {
+ throw new IllegalArgumentException("userID is null");
+ }
+ checkClustersBuilt();
+ Collection<User> cluster = clustersByUserID.get(userID);
+ if (cluster == null) {
+ return Collections.emptyList();
+ } else {
+ return cluster;
+ }
+ }
+
+ public Collection<Collection<User>> getClusters() throws TasteException {
+ checkClustersBuilt();
+ return allClusters;
+ }
+
+ private void checkClustersBuilt() throws TasteException {
+ if (!clustersBuilt) {
+ buildClusters();
+ }
+ }
+
+ private void buildClusters() throws TasteException {
+ try {
+ buildClustersLock.lock();
+ DataModel model = getDataModel();
+ int numUsers = model.getNumUsers();
+ if (numUsers > 0) {
+ List<Collection<User>> newClusters = new ArrayList<Collection<User>>(numUsers);
+ if (numUsers == 1) {
+ User onlyUser = model.getUsers().iterator().next();
+ newClusters.add(Collections.singleton(onlyUser));
+ } else {
+ // Begin with a cluster for each user:
+ for (User user : model.getUsers()) {
+ Collection<User> newCluster = new HashSet<User>();
+ newCluster.add(user);
+ newClusters.add(newCluster);
+ }
+ if (clusteringByThreshold) {
+ Pair<Collection<User>, Collection<User>> nearestPair = findNearestClusters(newClusters);
+ if (nearestPair != null) {
+ Collection<User> cluster1 = nearestPair.getFirst();
+ Collection<User> cluster2 = nearestPair.getSecond();
+ while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold) {
+ newClusters.remove(cluster1);
+ newClusters.remove(cluster2);
+ Collection<User> merged = new HashSet<User>(cluster1.size() + cluster2.size());
+ merged.addAll(cluster1);
+ merged.addAll(cluster2);
+ newClusters.add(merged);
+ nearestPair = findNearestClusters(newClusters);
+ if (nearestPair == null) {
+ break;
+ }
+ cluster1 = nearestPair.getFirst();
+ cluster2 = nearestPair.getSecond();
+ }
+ }
+ } else {
+ while (newClusters.size() > numClusters) {
+ Pair<Collection<User>, Collection<User>> nearestPair =
+ findNearestClusters(newClusters);
+ if (nearestPair == null) {
+ break;
+ }
+ Collection<User> cluster1 = nearestPair.getFirst();
+ Collection<User> cluster2 = nearestPair.getSecond();
+ newClusters.remove(cluster1);
+ newClusters.remove(cluster2);
+ Collection<User> merged = new HashSet<User>(cluster1.size() + cluster2.size());
+ merged.addAll(cluster1);
+ merged.addAll(cluster2);
+ newClusters.add(merged);
+ }
+ }
+ }
+ topRecsByUserID = computeTopRecsPerUserID(newClusters);
+ clustersByUserID = computeClustersPerUserID(newClusters);
+ allClusters = newClusters;
+ } else {
+ topRecsByUserID = Collections.emptyMap();
+ clustersByUserID = Collections.emptyMap();
+ allClusters = Collections.emptySet();
+ }
+ clustersBuilt = true;
+ } finally {
+ buildClustersLock.unlock();
+ }
+ }
+
+ private Pair<Collection<User>, Collection<User>> findNearestClusters(List<Collection<User>> clusters)
+ throws TasteException {
+ int size = clusters.size();
+ Pair<Collection<User>, Collection<User>> nearestPair = null;
+ double bestSimilarity = Double.NEGATIVE_INFINITY;
+ Random r = RandomUtils.getRandom();
+ for (int i = 0; i < size; i++) {
+ Collection<User> cluster1 = clusters.get(i);
+ for (int j = i + 1; j < size; j++) {
+ if (samplingPercentage >= 1.0 || r.nextDouble() < samplingPercentage) {
+ Collection<User> cluster2 = clusters.get(j);
+ double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
+ if (!Double.isNaN(similarity) && similarity > bestSimilarity) {
+ bestSimilarity = similarity;
+ nearestPair = new Pair<Collection<User>, Collection<User>>(cluster1, cluster2);
+ }
+ }
+ }
+ }
+ return nearestPair;
+ }
+
+ private static Map<Object, List<RecommendedItem>> computeTopRecsPerUserID(
+ Iterable<Collection<User>> clusters) throws TasteException {
+ Map<Object, List<RecommendedItem>> recsPerUser = new HashMap<Object, List<RecommendedItem>>();
+ for (Collection<User> cluster : clusters) {
+ List<RecommendedItem> recs = computeTopRecsForCluster(cluster);
+ for (User user : cluster) {
+ recsPerUser.put(user.getID(), recs);
+ }
+ }
+ return Collections.unmodifiableMap(recsPerUser);
+ }
+
+ private static List<RecommendedItem> computeTopRecsForCluster(Collection<User> cluster)
+ throws TasteException {
+
+ Collection<Item> allItems = new HashSet<Item>();
+ for (User user : cluster) {
+ Preference[] prefs = user.getPreferencesAsArray();
+ for (int i = 0; i < prefs.length; i++) {
+ allItems.add(prefs[i].getItem());
+ }
+ }
+
+ TopItems.Estimator<Item> estimator = new Estimator(cluster);
+
+ List<RecommendedItem> topItems =
+ TopItems.getTopItems(Integer.MAX_VALUE, allItems, NullRescorer.getItemInstance(), estimator);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Recommendations are: " + topItems);
+ }
+ return Collections.unmodifiableList(topItems);
+ }
+
+ private static Map<Object, Collection<User>> computeClustersPerUserID(Collection<Collection<User>> clusters) {
+ Map<Object, Collection<User>> clustersPerUser = new HashMap<Object, Collection<User>>(clusters.size());
+ for (Collection<User> cluster : clusters) {
+ for (User user : cluster) {
+ clustersPerUser.put(user.getID(), cluster);
+ }
+ }
+ return clustersPerUser;
+ }
+
+ @Override
+ public void refresh() {
+ if (refreshLock.isLocked()) {
+ return;
+ }
+ try {
+ refreshLock.lock();
+ super.refresh();
+ clusterSimilarity.refresh();
+ try {
+ buildClusters();
+ } catch (TasteException te) {
+ log.log(Level.WARNING, "Unexpected excpetion while refreshing", te);
+ }
+ } finally {
+ refreshLock.unlock();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "TreeClusteringRecommender[clusterSimilarity:" + clusterSimilarity + ']';
+ }
+
+ private static class Estimator implements TopItems.Estimator<Item> {
+
+ private final Collection<User> cluster;
+
+ private Estimator(Collection<User> cluster) {
+ this.cluster = cluster;
+ }
+
+ public double estimate(Item item) {
+ RunningAverage average = new FullRunningAverage();
+ for (User user : cluster) {
+ Preference pref = user.getPreferenceFor(item.getID());
+ if (pref != null) {
+ average.addDatum(pref.getValue());
+ }
+ }
+ return average.getAverage();
+ }
+ }
+}