You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/04 14:29:45 UTC
[43/53] [abbrv] [partial] mahout git commit: end of day 6-2-2018
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
new file mode 100644
index 0000000..7ed8cc3
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
@@ -0,0 +1,251 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.Cache;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.Retriever;
+import org.apache.mahout.cf.taste.impl.model.PlusAnonymousUserDataModel;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.apache.mahout.common.LongPair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * A {@link Recommender} which caches the results from another {@link Recommender} in memory.
+ *
+ * TODO: Should be checked for thread safety
+ * </p>
+ */
+public final class CachingRecommender implements Recommender {
+
+ private static final Logger log = LoggerFactory.getLogger(CachingRecommender.class);
+
+ private final Recommender recommender;
+ private final int[] maxHowMany;
+ private final Retriever<Long,Recommendations> recommendationsRetriever;
+ private final Cache<Long,Recommendations> recommendationCache;
+ private final Cache<LongPair,Float> estimatedPrefCache;
+ private final RefreshHelper refreshHelper;
+ private IDRescorer currentRescorer;
+ private boolean currentlyIncludeKnownItems;
+
+ public CachingRecommender(Recommender recommender) throws TasteException {
+ Preconditions.checkArgument(recommender != null, "recommender is null");
+ this.recommender = recommender;
+ maxHowMany = new int[]{1};
+ // Use "num users" as an upper limit on cache size. Rough guess.
+ int numUsers = recommender.getDataModel().getNumUsers();
+ recommendationsRetriever = new RecommendationRetriever();
+ recommendationCache = new Cache<>(recommendationsRetriever, numUsers);
+ estimatedPrefCache = new Cache<>(new EstimatedPrefRetriever(), numUsers);
+ refreshHelper = new RefreshHelper(new Callable<Object>() {
+ @Override
+ public Object call() {
+ clear();
+ return null;
+ }
+ });
+ refreshHelper.addDependency(recommender);
+ }
+
+ private void setCurrentRescorer(IDRescorer rescorer) {
+ if (rescorer == null) {
+ if (currentRescorer != null) {
+ currentRescorer = null;
+ clear();
+ }
+ } else {
+ if (!rescorer.equals(currentRescorer)) {
+ currentRescorer = rescorer;
+ clear();
+ }
+ }
+ }
+
+ public void setCurrentlyIncludeKnownItems(boolean currentlyIncludeKnownItems) {
+ this.currentlyIncludeKnownItems = currentlyIncludeKnownItems;
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany) throws TasteException {
+ return recommend(userID, howMany, null, false);
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException {
+ return recommend(userID, howMany, null, includeKnownItems);
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany,IDRescorer rescorer) throws TasteException {
+ return recommend(userID, howMany, rescorer, false);
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany,IDRescorer rescorer, boolean includeKnownItems)
+ throws TasteException {
+ Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");
+ synchronized (maxHowMany) {
+ if (howMany > maxHowMany[0]) {
+ maxHowMany[0] = howMany;
+ }
+ }
+
+ // Special case, avoid caching an anonymous user
+ if (userID == PlusAnonymousUserDataModel.TEMP_USER_ID) {
+ return recommendationsRetriever.get(PlusAnonymousUserDataModel.TEMP_USER_ID).getItems();
+ }
+
+ setCurrentRescorer(rescorer);
+ setCurrentlyIncludeKnownItems(includeKnownItems);
+
+ Recommendations recommendations = recommendationCache.get(userID);
+ if (recommendations.getItems().size() < howMany && !recommendations.isNoMoreRecommendableItems()) {
+ clear(userID);
+ recommendations = recommendationCache.get(userID);
+ if (recommendations.getItems().size() < howMany) {
+ recommendations.setNoMoreRecommendableItems(true);
+ }
+ }
+
+ List<RecommendedItem> recommendedItems = recommendations.getItems();
+ return recommendedItems.size() > howMany ? recommendedItems.subList(0, howMany) : recommendedItems;
+ }
+
+ @Override
+ public float estimatePreference(long userID, long itemID) throws TasteException {
+ return estimatedPrefCache.get(new LongPair(userID, itemID));
+ }
+
+ @Override
+ public void setPreference(long userID, long itemID, float value) throws TasteException {
+ recommender.setPreference(userID, itemID, value);
+ clear(userID);
+ }
+
+ @Override
+ public void removePreference(long userID, long itemID) throws TasteException {
+ recommender.removePreference(userID, itemID);
+ clear(userID);
+ }
+
+ @Override
+ public DataModel getDataModel() {
+ return recommender.getDataModel();
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ refreshHelper.refresh(alreadyRefreshed);
+ }
+
+ /**
+ * <p>
+ * Clears cached recommendations for the given user.
+ * </p>
+ *
+ * @param userID
+ * clear cached data associated with this user ID
+ */
+ public void clear(final long userID) {
+ log.debug("Clearing recommendations for user ID '{}'", userID);
+ recommendationCache.remove(userID);
+ estimatedPrefCache.removeKeysMatching(new Cache.MatchPredicate<LongPair>() {
+ @Override
+ public boolean matches(LongPair userItemPair) {
+ return userItemPair.getFirst() == userID;
+ }
+ });
+ }
+
+ /**
+ * <p>
+ * Clears all cached recommendations.
+ * </p>
+ */
+ public void clear() {
+ log.debug("Clearing all recommendations...");
+ recommendationCache.clear();
+ estimatedPrefCache.clear();
+ }
+
+ @Override
+ public String toString() {
+ return "CachingRecommender[recommender:" + recommender + ']';
+ }
+
+ private final class RecommendationRetriever implements Retriever<Long,Recommendations> {
+ @Override
+ public Recommendations get(Long key) throws TasteException {
+ log.debug("Retrieving new recommendations for user ID '{}'", key);
+ int howMany = maxHowMany[0];
+ IDRescorer rescorer = currentRescorer;
+ List<RecommendedItem> recommendations =
+ rescorer == null ? recommender.recommend(key, howMany, null, currentlyIncludeKnownItems) :
+ recommender.recommend(key, howMany, rescorer, currentlyIncludeKnownItems);
+ return new Recommendations(Collections.unmodifiableList(recommendations));
+ }
+ }
+
+ private final class EstimatedPrefRetriever implements Retriever<LongPair,Float> {
+ @Override
+ public Float get(LongPair key) throws TasteException {
+ long userID = key.getFirst();
+ long itemID = key.getSecond();
+ log.debug("Retrieving estimated preference for user ID '{}' and item ID '{}'", userID, itemID);
+ return recommender.estimatePreference(userID, itemID);
+ }
+ }
+
+ private static final class Recommendations {
+
+ private final List<RecommendedItem> items;
+ private boolean noMoreRecommendableItems;
+
+ private Recommendations(List<RecommendedItem> items) {
+ this.items = items;
+ }
+
+ List<RecommendedItem> getItems() {
+ return items;
+ }
+
+ boolean isNoMoreRecommendableItems() {
+ return noMoreRecommendableItems;
+ }
+
+ void setNoMoreRecommendableItems(boolean noMoreRecommendableItems) {
+ this.noMoreRecommendableItems = noMoreRecommendableItems;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/EstimatedPreferenceCapper.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/EstimatedPreferenceCapper.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/EstimatedPreferenceCapper.java
new file mode 100644
index 0000000..f0f389f
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/EstimatedPreferenceCapper.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * Simple class which encapsulates restricting a preference value
+ * to a predefined range. The simple logic is wrapped up here for
+ * performance reasons.
+ */
+public final class EstimatedPreferenceCapper {
+
+ private final float min;
+ private final float max;
+
+ public EstimatedPreferenceCapper(DataModel model) {
+ min = model.getMinPreference();
+ max = model.getMaxPreference();
+ }
+
+ public float capEstimate(float estimate) {
+ if (estimate > max) {
+ estimate = max;
+ } else if (estimate < min) {
+ estimate = min;
+ }
+ return estimate;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java
new file mode 100644
index 0000000..40e21a3
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+
+/**
+ * A variant on {@link GenericItemBasedRecommender} which is appropriate for use when no notion of preference
+ * value exists in the data.
+ *
+ * @see org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefUserBasedRecommender
+ */
+public final class GenericBooleanPrefItemBasedRecommender extends GenericItemBasedRecommender {
+
+ public GenericBooleanPrefItemBasedRecommender(DataModel dataModel, ItemSimilarity similarity) {
+ super(dataModel, similarity);
+ }
+
+ public GenericBooleanPrefItemBasedRecommender(DataModel dataModel, ItemSimilarity similarity,
+ CandidateItemsStrategy candidateItemsStrategy, MostSimilarItemsCandidateItemsStrategy
+ mostSimilarItemsCandidateItemsStrategy) {
+ super(dataModel, similarity, candidateItemsStrategy, mostSimilarItemsCandidateItemsStrategy);
+ }
+
+ /**
+ * This computation is in a technical sense, wrong, since in the domain of "boolean preference users" where
+ * all preference values are 1, this method should only ever return 1.0 or NaN. This isn't terribly useful
+ * however since it means results can't be ranked by preference value (all are 1). So instead this returns a
+ * sum of similarities.
+ */
+ @Override
+ protected float doEstimatePreference(long userID, PreferenceArray preferencesFromUser, long itemID)
+ throws TasteException {
+ double[] similarities = getSimilarity().itemSimilarities(itemID, preferencesFromUser.getIDs());
+ boolean foundAPref = false;
+ double totalSimilarity = 0.0;
+ for (double theSimilarity : similarities) {
+ if (!Double.isNaN(theSimilarity)) {
+ foundAPref = true;
+ totalSimilarity += theSimilarity;
+ }
+ }
+ return foundAPref ? (float) totalSimilarity : Float.NaN;
+ }
+
+ @Override
+ public String toString() {
+ return "GenericBooleanPrefItemBasedRecommender";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefUserBasedRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefUserBasedRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefUserBasedRecommender.java
new file mode 100644
index 0000000..15fcc9f
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefUserBasedRecommender.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+/**
+ * A variant on {@link GenericUserBasedRecommender} which is appropriate for use when no notion of preference
+ * value exists in the data.
+ */
+public final class GenericBooleanPrefUserBasedRecommender extends GenericUserBasedRecommender {
+
+ public GenericBooleanPrefUserBasedRecommender(DataModel dataModel,
+ UserNeighborhood neighborhood,
+ UserSimilarity similarity) {
+ super(dataModel, neighborhood, similarity);
+ }
+
+ /**
+ * This computation is in a technical sense, wrong, since in the domain of "boolean preference users" where
+ * all preference values are 1, this method should only ever return 1.0 or NaN. This isn't terribly useful
+ * however since it means results can't be ranked by preference value (all are 1). So instead this returns a
+ * sum of similarities to any other user in the neighborhood who has also rated the item.
+ */
+ @Override
+ protected float doEstimatePreference(long theUserID, long[] theNeighborhood, long itemID) throws TasteException {
+ if (theNeighborhood.length == 0) {
+ return Float.NaN;
+ }
+ DataModel dataModel = getDataModel();
+ UserSimilarity similarity = getSimilarity();
+ float totalSimilarity = 0.0f;
+ boolean foundAPref = false;
+ for (long userID : theNeighborhood) {
+ // See GenericItemBasedRecommender.doEstimatePreference() too
+ if (userID != theUserID && dataModel.getPreferenceValue(userID, itemID) != null) {
+ foundAPref = true;
+ totalSimilarity += (float) similarity.userSimilarity(theUserID, userID);
+ }
+ }
+ return foundAPref ? totalSimilarity : Float.NaN;
+ }
+
+ @Override
+ protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID, boolean includeKnownItems)
+ throws TasteException {
+ DataModel dataModel = getDataModel();
+ FastIDSet possibleItemIDs = new FastIDSet();
+ for (long userID : theNeighborhood) {
+ possibleItemIDs.addAll(dataModel.getItemIDsFromUser(userID));
+ }
+ if (!includeKnownItems) {
+ possibleItemIDs.removeAll(dataModel.getItemIDsFromUser(theUserID));
+ }
+ return possibleItemIDs;
+ }
+
+ @Override
+ public String toString() {
+ return "GenericBooleanPrefUserBasedRecommender";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
new file mode 100644
index 0000000..6dc8aa5
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
@@ -0,0 +1,378 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
+import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.common.LongPair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * A simple {@link org.apache.mahout.cf.taste.recommender.Recommender} which uses a given
+ * {@link org.apache.mahout.cf.taste.model.DataModel} and
+ * {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} to produce recommendations. This class
+ * represents Taste's support for item-based recommenders.
+ * </p>
+ *
+ * <p>
+ * The {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} is the most important point to discuss
+ * here. Item-based recommenders are useful because they can take advantage of something to be very fast: they
+ * base their computations on item similarity, not user similarity, and item similarity is relatively static.
+ * It can be precomputed, instead of re-computed in real time.
+ * </p>
+ *
+ * <p>
+ * Thus it's strongly recommended that you use
+ * {@link org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity} with pre-computed similarities if
+ * you're going to use this class. You can use
+ * {@link org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity} too, which computes
+ * similarities in real-time, but will probably find this painfully slow for large amounts of data.
+ * </p>
+ */
+public class GenericItemBasedRecommender extends AbstractRecommender implements ItemBasedRecommender {
+
+ private static final Logger log = LoggerFactory.getLogger(GenericItemBasedRecommender.class);
+
+ private final ItemSimilarity similarity;
+ private final MostSimilarItemsCandidateItemsStrategy mostSimilarItemsCandidateItemsStrategy;
+ private final RefreshHelper refreshHelper;
+ private EstimatedPreferenceCapper capper;
+
+ private static final boolean EXCLUDE_ITEM_IF_NOT_SIMILAR_TO_ALL_BY_DEFAULT = true;
+
+ public GenericItemBasedRecommender(DataModel dataModel,
+ ItemSimilarity similarity,
+ CandidateItemsStrategy candidateItemsStrategy,
+ MostSimilarItemsCandidateItemsStrategy mostSimilarItemsCandidateItemsStrategy) {
+ super(dataModel, candidateItemsStrategy);
+ Preconditions.checkArgument(similarity != null, "similarity is null");
+ this.similarity = similarity;
+ Preconditions.checkArgument(mostSimilarItemsCandidateItemsStrategy != null,
+ "mostSimilarItemsCandidateItemsStrategy is null");
+ this.mostSimilarItemsCandidateItemsStrategy = mostSimilarItemsCandidateItemsStrategy;
+ this.refreshHelper = new RefreshHelper(new Callable<Void>() {
+ @Override
+ public Void call() {
+ capper = buildCapper();
+ return null;
+ }
+ });
+ refreshHelper.addDependency(dataModel);
+ refreshHelper.addDependency(similarity);
+ refreshHelper.addDependency(candidateItemsStrategy);
+ refreshHelper.addDependency(mostSimilarItemsCandidateItemsStrategy);
+ capper = buildCapper();
+ }
+
+ public GenericItemBasedRecommender(DataModel dataModel, ItemSimilarity similarity) {
+ this(dataModel,
+ similarity,
+ AbstractRecommender.getDefaultCandidateItemsStrategy(),
+ getDefaultMostSimilarItemsCandidateItemsStrategy());
+ }
+
+ protected static MostSimilarItemsCandidateItemsStrategy getDefaultMostSimilarItemsCandidateItemsStrategy() {
+ return new PreferredItemsNeighborhoodCandidateItemsStrategy();
+ }
+
+ public ItemSimilarity getSimilarity() {
+ return similarity;
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
+ throws TasteException {
+ Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");
+ log.debug("Recommending items for user ID '{}'", userID);
+
+ PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID);
+ if (preferencesFromUser.length() == 0) {
+ return Collections.emptyList();
+ }
+
+ FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems);
+
+ TopItems.Estimator<Long> estimator = new Estimator(userID, preferencesFromUser);
+
+ List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer,
+ estimator);
+
+ log.debug("Recommendations are: {}", topItems);
+ return topItems;
+ }
+
+ @Override
+ public float estimatePreference(long userID, long itemID) throws TasteException {
+ PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID);
+ Float actualPref = getPreferenceForItem(preferencesFromUser, itemID);
+ if (actualPref != null) {
+ return actualPref;
+ }
+ return doEstimatePreference(userID, preferencesFromUser, itemID);
+ }
+
+ private static Float getPreferenceForItem(PreferenceArray preferencesFromUser, long itemID) {
+ int size = preferencesFromUser.length();
+ for (int i = 0; i < size; i++) {
+ if (preferencesFromUser.getItemID(i) == itemID) {
+ return preferencesFromUser.getValue(i);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public List<RecommendedItem> mostSimilarItems(long itemID, int howMany) throws TasteException {
+ return mostSimilarItems(itemID, howMany, null);
+ }
+
+ @Override
+ public List<RecommendedItem> mostSimilarItems(long itemID, int howMany,
+ Rescorer<LongPair> rescorer) throws TasteException {
+ TopItems.Estimator<Long> estimator = new MostSimilarEstimator(itemID, similarity, rescorer);
+ return doMostSimilarItems(new long[] {itemID}, howMany, estimator);
+ }
+
+ @Override
+ public List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany) throws TasteException {
+ TopItems.Estimator<Long> estimator = new MultiMostSimilarEstimator(itemIDs, similarity, null,
+ EXCLUDE_ITEM_IF_NOT_SIMILAR_TO_ALL_BY_DEFAULT);
+ return doMostSimilarItems(itemIDs, howMany, estimator);
+ }
+
+ @Override
+ public List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany,
+ Rescorer<LongPair> rescorer) throws TasteException {
+ TopItems.Estimator<Long> estimator = new MultiMostSimilarEstimator(itemIDs, similarity, rescorer,
+ EXCLUDE_ITEM_IF_NOT_SIMILAR_TO_ALL_BY_DEFAULT);
+ return doMostSimilarItems(itemIDs, howMany, estimator);
+ }
+
+ @Override
+ public List<RecommendedItem> mostSimilarItems(long[] itemIDs,
+ int howMany,
+ boolean excludeItemIfNotSimilarToAll) throws TasteException {
+ TopItems.Estimator<Long> estimator = new MultiMostSimilarEstimator(itemIDs, similarity, null,
+ excludeItemIfNotSimilarToAll);
+ return doMostSimilarItems(itemIDs, howMany, estimator);
+ }
+
+ @Override
+ public List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany,
+ Rescorer<LongPair> rescorer,
+ boolean excludeItemIfNotSimilarToAll) throws TasteException {
+ TopItems.Estimator<Long> estimator = new MultiMostSimilarEstimator(itemIDs, similarity, rescorer,
+ excludeItemIfNotSimilarToAll);
+ return doMostSimilarItems(itemIDs, howMany, estimator);
+ }
+
+ @Override
+ public List<RecommendedItem> recommendedBecause(long userID, long itemID, int howMany) throws TasteException {
+ Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");
+
+ DataModel model = getDataModel();
+ TopItems.Estimator<Long> estimator = new RecommendedBecauseEstimator(userID, itemID);
+
+ PreferenceArray prefs = model.getPreferencesFromUser(userID);
+ int size = prefs.length();
+ FastIDSet allUserItems = new FastIDSet(size);
+ for (int i = 0; i < size; i++) {
+ allUserItems.add(prefs.getItemID(i));
+ }
+ allUserItems.remove(itemID);
+
+ return TopItems.getTopItems(howMany, allUserItems.iterator(), null, estimator);
+ }
+
+ private List<RecommendedItem> doMostSimilarItems(long[] itemIDs,
+ int howMany,
+ TopItems.Estimator<Long> estimator) throws TasteException {
+ FastIDSet possibleItemIDs = mostSimilarItemsCandidateItemsStrategy.getCandidateItems(itemIDs, getDataModel());
+ return TopItems.getTopItems(howMany, possibleItemIDs.iterator(), null, estimator);
+ }
+
+ protected float doEstimatePreference(long userID, PreferenceArray preferencesFromUser, long itemID)
+ throws TasteException {
+ double preference = 0.0;
+ double totalSimilarity = 0.0;
+ int count = 0;
+ double[] similarities = similarity.itemSimilarities(itemID, preferencesFromUser.getIDs());
+ for (int i = 0; i < similarities.length; i++) {
+ double theSimilarity = similarities[i];
+ if (!Double.isNaN(theSimilarity)) {
+ // Weights can be negative!
+ preference += theSimilarity * preferencesFromUser.getValue(i);
+ totalSimilarity += theSimilarity;
+ count++;
+ }
+ }
+ // Throw out the estimate if it was based on no data points, of course, but also if based on
+ // just one. This is a bit of a band-aid on the 'stock' item-based algorithm for the moment.
+ // The reason is that in this case the estimate is, simply, the user's rating for one item
+ // that happened to have a defined similarity. The similarity score doesn't matter, and that
+ // seems like a bad situation.
+ if (count <= 1) {
+ return Float.NaN;
+ }
+ float estimate = (float) (preference / totalSimilarity);
+ if (capper != null) {
+ estimate = capper.capEstimate(estimate);
+ }
+ return estimate;
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ refreshHelper.refresh(alreadyRefreshed);
+ }
+
+ @Override
+ public String toString() {
+ return "GenericItemBasedRecommender[similarity:" + similarity + ']';
+ }
+
+ private EstimatedPreferenceCapper buildCapper() {
+ DataModel dataModel = getDataModel();
+ if (Float.isNaN(dataModel.getMinPreference()) && Float.isNaN(dataModel.getMaxPreference())) {
+ return null;
+ } else {
+ return new EstimatedPreferenceCapper(dataModel);
+ }
+ }
+
+ public static class MostSimilarEstimator implements TopItems.Estimator<Long> {
+
+ private final long toItemID;
+ private final ItemSimilarity similarity;
+ private final Rescorer<LongPair> rescorer;
+
+ public MostSimilarEstimator(long toItemID, ItemSimilarity similarity, Rescorer<LongPair> rescorer) {
+ this.toItemID = toItemID;
+ this.similarity = similarity;
+ this.rescorer = rescorer;
+ }
+
+ @Override
+ public double estimate(Long itemID) throws TasteException {
+ LongPair pair = new LongPair(toItemID, itemID);
+ if (rescorer != null && rescorer.isFiltered(pair)) {
+ return Double.NaN;
+ }
+ double originalEstimate = similarity.itemSimilarity(toItemID, itemID);
+ return rescorer == null ? originalEstimate : rescorer.rescore(pair, originalEstimate);
+ }
+ }
+
+ private final class Estimator implements TopItems.Estimator<Long> {
+
+ private final long userID;
+ private final PreferenceArray preferencesFromUser;
+
+ private Estimator(long userID, PreferenceArray preferencesFromUser) {
+ this.userID = userID;
+ this.preferencesFromUser = preferencesFromUser;
+ }
+
+ @Override
+ public double estimate(Long itemID) throws TasteException {
+ return doEstimatePreference(userID, preferencesFromUser, itemID);
+ }
+ }
+
+ private static final class MultiMostSimilarEstimator implements TopItems.Estimator<Long> {
+
+ private final long[] toItemIDs;
+ private final ItemSimilarity similarity;
+ private final Rescorer<LongPair> rescorer;
+ private final boolean excludeItemIfNotSimilarToAll;
+
+ private MultiMostSimilarEstimator(long[] toItemIDs, ItemSimilarity similarity, Rescorer<LongPair> rescorer,
+ boolean excludeItemIfNotSimilarToAll) {
+ this.toItemIDs = toItemIDs;
+ this.similarity = similarity;
+ this.rescorer = rescorer;
+ this.excludeItemIfNotSimilarToAll = excludeItemIfNotSimilarToAll;
+ }
+
+ @Override
+ public double estimate(Long itemID) throws TasteException {
+ RunningAverage average = new FullRunningAverage();
+ double[] similarities = similarity.itemSimilarities(itemID, toItemIDs);
+ for (int i = 0; i < toItemIDs.length; i++) {
+ long toItemID = toItemIDs[i];
+ LongPair pair = new LongPair(toItemID, itemID);
+ if (rescorer != null && rescorer.isFiltered(pair)) {
+ continue;
+ }
+ double estimate = similarities[i];
+ if (rescorer != null) {
+ estimate = rescorer.rescore(pair, estimate);
+ }
+ if (excludeItemIfNotSimilarToAll || !Double.isNaN(estimate)) {
+ average.addDatum(estimate);
+ }
+ }
+ double averageEstimate = average.getAverage();
+ return averageEstimate == 0 ? Double.NaN : averageEstimate;
+ }
+ }
+
+ private final class RecommendedBecauseEstimator implements TopItems.Estimator<Long> {
+
+ private final long userID;
+ private final long recommendedItemID;
+
+ private RecommendedBecauseEstimator(long userID, long recommendedItemID) {
+ this.userID = userID;
+ this.recommendedItemID = recommendedItemID;
+ }
+
+ @Override
+ public double estimate(Long itemID) throws TasteException {
+ Float pref = getDataModel().getPreferenceValue(userID, itemID);
+ if (pref == null) {
+ return Float.NaN;
+ }
+ double similarityValue = similarity.itemSimilarity(recommendedItemID, itemID);
+ return (1.0 + similarityValue) * pref;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java
new file mode 100644
index 0000000..8c8f6ce
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.io.Serializable;
+
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.RandomUtils;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * A simple implementation of {@link RecommendedItem}.
+ * </p>
+ */
+public final class GenericRecommendedItem implements RecommendedItem, Serializable {
+
+ private final long itemID;
+ private final float value;
+
+ /**
+ * @throws IllegalArgumentException
+ * if item is null or value is NaN
+ */
+ public GenericRecommendedItem(long itemID, float value) {
+ Preconditions.checkArgument(!Float.isNaN(value), "value is NaN");
+ this.itemID = itemID;
+ this.value = value;
+ }
+
+ @Override
+ public long getItemID() {
+ return itemID;
+ }
+
+ @Override
+ public float getValue() {
+ return value;
+ }
+
+ @Override
+ public String toString() {
+ return "RecommendedItem[item:" + itemID + ", value:" + value + ']';
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) itemID ^ RandomUtils.hashFloat(value);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof GenericRecommendedItem)) {
+ return false;
+ }
+ RecommendedItem other = (RecommendedItem) o;
+ return itemID == other.getItemID() && value == other.getValue();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
new file mode 100644
index 0000000..1e2ef73
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
@@ -0,0 +1,247 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+import org.apache.mahout.common.LongPair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * A simple {@link org.apache.mahout.cf.taste.recommender.Recommender}
+ * which uses a given {@link DataModel} and {@link UserNeighborhood} to produce recommendations.
+ * </p>
+ */
+public class GenericUserBasedRecommender extends AbstractRecommender implements UserBasedRecommender {
+
+ private static final Logger log = LoggerFactory.getLogger(GenericUserBasedRecommender.class);
+
+ private final UserNeighborhood neighborhood;
+ private final UserSimilarity similarity;
+ private final RefreshHelper refreshHelper;
+ private EstimatedPreferenceCapper capper;
+
+ public GenericUserBasedRecommender(DataModel dataModel,
+ UserNeighborhood neighborhood,
+ UserSimilarity similarity) {
+ super(dataModel);
+ Preconditions.checkArgument(neighborhood != null, "neighborhood is null");
+ this.neighborhood = neighborhood;
+ this.similarity = similarity;
+ this.refreshHelper = new RefreshHelper(new Callable<Void>() {
+ @Override
+ public Void call() {
+ capper = buildCapper();
+ return null;
+ }
+ });
+ refreshHelper.addDependency(dataModel);
+ refreshHelper.addDependency(similarity);
+ refreshHelper.addDependency(neighborhood);
+ capper = buildCapper();
+ }
+
+ public UserSimilarity getSimilarity() {
+ return similarity;
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
+ throws TasteException {
+ Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");
+
+ log.debug("Recommending items for user ID '{}'", userID);
+
+ long[] theNeighborhood = neighborhood.getUserNeighborhood(userID);
+
+ if (theNeighborhood.length == 0) {
+ return Collections.emptyList();
+ }
+
+ FastIDSet allItemIDs = getAllOtherItems(theNeighborhood, userID, includeKnownItems);
+
+ TopItems.Estimator<Long> estimator = new Estimator(userID, theNeighborhood);
+
+ List<RecommendedItem> topItems = TopItems
+ .getTopItems(howMany, allItemIDs.iterator(), rescorer, estimator);
+
+ log.debug("Recommendations are: {}", topItems);
+ return topItems;
+ }
+
+ @Override
+ public float estimatePreference(long userID, long itemID) throws TasteException {
+ DataModel model = getDataModel();
+ Float actualPref = model.getPreferenceValue(userID, itemID);
+ if (actualPref != null) {
+ return actualPref;
+ }
+ long[] theNeighborhood = neighborhood.getUserNeighborhood(userID);
+ return doEstimatePreference(userID, theNeighborhood, itemID);
+ }
+
+ @Override
+ public long[] mostSimilarUserIDs(long userID, int howMany) throws TasteException {
+ return mostSimilarUserIDs(userID, howMany, null);
+ }
+
+ @Override
+ public long[] mostSimilarUserIDs(long userID, int howMany, Rescorer<LongPair> rescorer) throws TasteException {
+ TopItems.Estimator<Long> estimator = new MostSimilarEstimator(userID, similarity, rescorer);
+ return doMostSimilarUsers(howMany, estimator);
+ }
+
+ private long[] doMostSimilarUsers(int howMany, TopItems.Estimator<Long> estimator) throws TasteException {
+ DataModel model = getDataModel();
+ return TopItems.getTopUsers(howMany, model.getUserIDs(), null, estimator);
+ }
+
+ protected float doEstimatePreference(long theUserID, long[] theNeighborhood, long itemID) throws TasteException {
+ if (theNeighborhood.length == 0) {
+ return Float.NaN;
+ }
+ DataModel dataModel = getDataModel();
+ double preference = 0.0;
+ double totalSimilarity = 0.0;
+ int count = 0;
+ for (long userID : theNeighborhood) {
+ if (userID != theUserID) {
+ // See GenericItemBasedRecommender.doEstimatePreference() too
+ Float pref = dataModel.getPreferenceValue(userID, itemID);
+ if (pref != null) {
+ double theSimilarity = similarity.userSimilarity(theUserID, userID);
+ if (!Double.isNaN(theSimilarity)) {
+ preference += theSimilarity * pref;
+ totalSimilarity += theSimilarity;
+ count++;
+ }
+ }
+ }
+ }
+ // Throw out the estimate if it was based on no data points, of course, but also if based on
+ // just one. This is a bit of a band-aid on the 'stock' item-based algorithm for the moment.
+ // The reason is that in this case the estimate is, simply, the user's rating for one item
+ // that happened to have a defined similarity. The similarity score doesn't matter, and that
+ // seems like a bad situation.
+ if (count <= 1) {
+ return Float.NaN;
+ }
+ float estimate = (float) (preference / totalSimilarity);
+ if (capper != null) {
+ estimate = capper.capEstimate(estimate);
+ }
+ return estimate;
+ }
+
+ protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID, boolean includeKnownItems)
+ throws TasteException {
+ DataModel dataModel = getDataModel();
+ FastIDSet possibleItemIDs = new FastIDSet();
+ for (long userID : theNeighborhood) {
+ possibleItemIDs.addAll(dataModel.getItemIDsFromUser(userID));
+ }
+ if (!includeKnownItems) {
+ possibleItemIDs.removeAll(dataModel.getItemIDsFromUser(theUserID));
+ }
+ return possibleItemIDs;
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ refreshHelper.refresh(alreadyRefreshed);
+ }
+
+ @Override
+ public String toString() {
+ return "GenericUserBasedRecommender[neighborhood:" + neighborhood + ']';
+ }
+
+ private EstimatedPreferenceCapper buildCapper() {
+ DataModel dataModel = getDataModel();
+ if (Float.isNaN(dataModel.getMinPreference()) && Float.isNaN(dataModel.getMaxPreference())) {
+ return null;
+ } else {
+ return new EstimatedPreferenceCapper(dataModel);
+ }
+ }
+
+ private static final class MostSimilarEstimator implements TopItems.Estimator<Long> {
+
+ private final long toUserID;
+ private final UserSimilarity similarity;
+ private final Rescorer<LongPair> rescorer;
+
+ private MostSimilarEstimator(long toUserID, UserSimilarity similarity, Rescorer<LongPair> rescorer) {
+ this.toUserID = toUserID;
+ this.similarity = similarity;
+ this.rescorer = rescorer;
+ }
+
+ @Override
+ public double estimate(Long userID) throws TasteException {
+ // Don't consider the user itself as a possible most similar user
+ if (userID == toUserID) {
+ return Double.NaN;
+ }
+ if (rescorer == null) {
+ return similarity.userSimilarity(toUserID, userID);
+ } else {
+ LongPair pair = new LongPair(toUserID, userID);
+ if (rescorer.isFiltered(pair)) {
+ return Double.NaN;
+ }
+ double originalEstimate = similarity.userSimilarity(toUserID, userID);
+ return rescorer.rescore(pair, originalEstimate);
+ }
+ }
+ }
+
+ private final class Estimator implements TopItems.Estimator<Long> {
+
+ private final long theUserID;
+ private final long[] theNeighborhood;
+
+ Estimator(long theUserID, long[] theNeighborhood) {
+ this.theUserID = theUserID;
+ this.theNeighborhood = theNeighborhood;
+ }
+
+ @Override
+ public double estimate(Long itemID) throws TasteException {
+ return doEstimatePreference(theUserID, theNeighborhood, itemID);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java
new file mode 100644
index 0000000..618c65f
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * A simple recommender that always estimates preference for an item to be the average of all known preference
+ * values for that item. No information about users is taken into account. This implementation is provided for
+ * experimentation; while simple and fast, it may not produce very good recommendations.
+ * </p>
+ */
+public final class ItemAverageRecommender extends AbstractRecommender {
+
+ private static final Logger log = LoggerFactory.getLogger(ItemAverageRecommender.class);
+
+ private final FastByIDMap<RunningAverage> itemAverages;
+ private final ReadWriteLock buildAveragesLock;
+ private final RefreshHelper refreshHelper;
+
+ public ItemAverageRecommender(DataModel dataModel) throws TasteException {
+ super(dataModel);
+ this.itemAverages = new FastByIDMap<>();
+ this.buildAveragesLock = new ReentrantReadWriteLock();
+ this.refreshHelper = new RefreshHelper(new Callable<Object>() {
+ @Override
+ public Object call() throws TasteException {
+ buildAverageDiffs();
+ return null;
+ }
+ });
+ refreshHelper.addDependency(dataModel);
+ buildAverageDiffs();
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
+ throws TasteException {
+ Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");
+ log.debug("Recommending items for user ID '{}'", userID);
+
+ PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID);
+ FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems);
+
+ TopItems.Estimator<Long> estimator = new Estimator();
+
+ List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer,
+ estimator);
+
+ log.debug("Recommendations are: {}", topItems);
+ return topItems;
+ }
+
+ @Override
+ public float estimatePreference(long userID, long itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
+ Float actualPref = dataModel.getPreferenceValue(userID, itemID);
+ if (actualPref != null) {
+ return actualPref;
+ }
+ return doEstimatePreference(itemID);
+ }
+
+ private float doEstimatePreference(long itemID) {
+ buildAveragesLock.readLock().lock();
+ try {
+ RunningAverage average = itemAverages.get(itemID);
+ return average == null ? Float.NaN : (float) average.getAverage();
+ } finally {
+ buildAveragesLock.readLock().unlock();
+ }
+ }
+
+ private void buildAverageDiffs() throws TasteException {
+ try {
+ buildAveragesLock.writeLock().lock();
+ DataModel dataModel = getDataModel();
+ LongPrimitiveIterator it = dataModel.getUserIDs();
+ while (it.hasNext()) {
+ PreferenceArray prefs = dataModel.getPreferencesFromUser(it.nextLong());
+ int size = prefs.length();
+ for (int i = 0; i < size; i++) {
+ long itemID = prefs.getItemID(i);
+ RunningAverage average = itemAverages.get(itemID);
+ if (average == null) {
+ average = new FullRunningAverage();
+ itemAverages.put(itemID, average);
+ }
+ average.addDatum(prefs.getValue(i));
+ }
+ }
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+
+ @Override
+ public void setPreference(long userID, long itemID, float value) throws TasteException {
+ DataModel dataModel = getDataModel();
+ double prefDelta;
+ try {
+ Float oldPref = dataModel.getPreferenceValue(userID, itemID);
+ prefDelta = oldPref == null ? value : value - oldPref;
+ } catch (NoSuchUserException nsee) {
+ prefDelta = value;
+ }
+ super.setPreference(userID, itemID, value);
+ try {
+ buildAveragesLock.writeLock().lock();
+ RunningAverage average = itemAverages.get(itemID);
+ if (average == null) {
+ RunningAverage newAverage = new FullRunningAverage();
+ newAverage.addDatum(prefDelta);
+ itemAverages.put(itemID, newAverage);
+ } else {
+ average.changeDatum(prefDelta);
+ }
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+
+ @Override
+ public void removePreference(long userID, long itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
+ Float oldPref = dataModel.getPreferenceValue(userID, itemID);
+ super.removePreference(userID, itemID);
+ if (oldPref != null) {
+ try {
+ buildAveragesLock.writeLock().lock();
+ RunningAverage average = itemAverages.get(itemID);
+ if (average == null) {
+ throw new IllegalStateException("No preferences exist for item ID: " + itemID);
+ } else {
+ average.removeDatum(oldPref);
+ }
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ refreshHelper.refresh(alreadyRefreshed);
+ }
+
+ @Override
+ public String toString() {
+ return "ItemAverageRecommender";
+ }
+
+ private final class Estimator implements TopItems.Estimator<Long> {
+
+ @Override
+ public double estimate(Long itemID) {
+ return doEstimatePreference(itemID);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java
new file mode 100644
index 0000000..b2bcd24
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java
@@ -0,0 +1,240 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Like {@link ItemAverageRecommender}, except that estimated preferences are adjusted for the users' average
+ * preference value. For example, say user X has not rated item Y. Item Y's average preference value is 3.5.
+ * User X's average preference value is 4.2, and the average over all preference values is 4.0. User X prefers
+ * items 0.2 higher on average, so, the estimated preference for user X, item Y is 3.5 + 0.2 = 3.7.
+ * </p>
+ */
+public final class ItemUserAverageRecommender extends AbstractRecommender {
+
+ private static final Logger log = LoggerFactory.getLogger(ItemUserAverageRecommender.class);
+
+ private final FastByIDMap<RunningAverage> itemAverages;
+ private final FastByIDMap<RunningAverage> userAverages;
+ private final RunningAverage overallAveragePrefValue;
+ private final ReadWriteLock buildAveragesLock;
+ private final RefreshHelper refreshHelper;
+
+ public ItemUserAverageRecommender(DataModel dataModel) throws TasteException {
+ super(dataModel);
+ this.itemAverages = new FastByIDMap<>();
+ this.userAverages = new FastByIDMap<>();
+ this.overallAveragePrefValue = new FullRunningAverage();
+ this.buildAveragesLock = new ReentrantReadWriteLock();
+ this.refreshHelper = new RefreshHelper(new Callable<Object>() {
+ @Override
+ public Object call() throws TasteException {
+ buildAverageDiffs();
+ return null;
+ }
+ });
+ refreshHelper.addDependency(dataModel);
+ buildAverageDiffs();
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
+ throws TasteException {
+ Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");
+ log.debug("Recommending items for user ID '{}'", userID);
+
+ PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID);
+ FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems);
+
+ TopItems.Estimator<Long> estimator = new Estimator(userID);
+
+ List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer,
+ estimator);
+
+ log.debug("Recommendations are: {}", topItems);
+ return topItems;
+ }
+
+ @Override
+ public float estimatePreference(long userID, long itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
+ Float actualPref = dataModel.getPreferenceValue(userID, itemID);
+ if (actualPref != null) {
+ return actualPref;
+ }
+ return doEstimatePreference(userID, itemID);
+ }
+
+ private float doEstimatePreference(long userID, long itemID) {
+ buildAveragesLock.readLock().lock();
+ try {
+ RunningAverage itemAverage = itemAverages.get(itemID);
+ if (itemAverage == null) {
+ return Float.NaN;
+ }
+ RunningAverage userAverage = userAverages.get(userID);
+ if (userAverage == null) {
+ return Float.NaN;
+ }
+ double userDiff = userAverage.getAverage() - overallAveragePrefValue.getAverage();
+ return (float) (itemAverage.getAverage() + userDiff);
+ } finally {
+ buildAveragesLock.readLock().unlock();
+ }
+ }
+
+ private void buildAverageDiffs() throws TasteException {
+ try {
+ buildAveragesLock.writeLock().lock();
+ DataModel dataModel = getDataModel();
+ LongPrimitiveIterator it = dataModel.getUserIDs();
+ while (it.hasNext()) {
+ long userID = it.nextLong();
+ PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
+ int size = prefs.length();
+ for (int i = 0; i < size; i++) {
+ long itemID = prefs.getItemID(i);
+ float value = prefs.getValue(i);
+ addDatumAndCreateIfNeeded(itemID, value, itemAverages);
+ addDatumAndCreateIfNeeded(userID, value, userAverages);
+ overallAveragePrefValue.addDatum(value);
+ }
+ }
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+
+ private static void addDatumAndCreateIfNeeded(long itemID, float value, FastByIDMap<RunningAverage> averages) {
+ RunningAverage itemAverage = averages.get(itemID);
+ if (itemAverage == null) {
+ itemAverage = new FullRunningAverage();
+ averages.put(itemID, itemAverage);
+ }
+ itemAverage.addDatum(value);
+ }
+
+ @Override
+ public void setPreference(long userID, long itemID, float value) throws TasteException {
+ DataModel dataModel = getDataModel();
+ double prefDelta;
+ try {
+ Float oldPref = dataModel.getPreferenceValue(userID, itemID);
+ prefDelta = oldPref == null ? value : value - oldPref;
+ } catch (NoSuchUserException nsee) {
+ prefDelta = value;
+ }
+ super.setPreference(userID, itemID, value);
+ try {
+ buildAveragesLock.writeLock().lock();
+ RunningAverage itemAverage = itemAverages.get(itemID);
+ if (itemAverage == null) {
+ RunningAverage newItemAverage = new FullRunningAverage();
+ newItemAverage.addDatum(prefDelta);
+ itemAverages.put(itemID, newItemAverage);
+ } else {
+ itemAverage.changeDatum(prefDelta);
+ }
+ RunningAverage userAverage = userAverages.get(userID);
+ if (userAverage == null) {
+ RunningAverage newUserAveragae = new FullRunningAverage();
+ newUserAveragae.addDatum(prefDelta);
+ userAverages.put(userID, newUserAveragae);
+ } else {
+ userAverage.changeDatum(prefDelta);
+ }
+ overallAveragePrefValue.changeDatum(prefDelta);
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+
+ @Override
+ public void removePreference(long userID, long itemID) throws TasteException {
+ DataModel dataModel = getDataModel();
+ Float oldPref = dataModel.getPreferenceValue(userID, itemID);
+ super.removePreference(userID, itemID);
+ if (oldPref != null) {
+ try {
+ buildAveragesLock.writeLock().lock();
+ RunningAverage itemAverage = itemAverages.get(itemID);
+ if (itemAverage == null) {
+ throw new IllegalStateException("No preferences exist for item ID: " + itemID);
+ }
+ itemAverage.removeDatum(oldPref);
+ RunningAverage userAverage = userAverages.get(userID);
+ if (userAverage == null) {
+ throw new IllegalStateException("No preferences exist for user ID: " + userID);
+ }
+ userAverage.removeDatum(oldPref);
+ overallAveragePrefValue.removeDatum(oldPref);
+ } finally {
+ buildAveragesLock.writeLock().unlock();
+ }
+ }
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ refreshHelper.refresh(alreadyRefreshed);
+ }
+
+ @Override
+ public String toString() {
+ return "ItemUserAverageRecommender";
+ }
+
+ private final class Estimator implements TopItems.Estimator<Long> {
+
+ private final long userID;
+
+ private Estimator(long userID) {
+ this.userID = userID;
+ }
+
+ @Override
+ public double estimate(Long itemID) {
+ return doEstimatePreference(userID, itemID);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java
new file mode 100644
index 0000000..e0eda7a
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+import org.apache.mahout.common.LongPair;
+
+/**
+ * <p>
+ * A simple {@link Rescorer} which always returns the original score.
+ * </p>
+ */
+public final class NullRescorer<T> implements Rescorer<T>, IDRescorer {
+
+ private static final IDRescorer USER_OR_ITEM_INSTANCE = new NullRescorer<>();
+ private static final Rescorer<LongPair> ITEM_ITEM_PAIR_INSTANCE = new NullRescorer<>();
+ private static final Rescorer<LongPair> USER_USER_PAIR_INSTANCE = new NullRescorer<>();
+
+ private NullRescorer() {
+ }
+
+ public static IDRescorer getItemInstance() {
+ return USER_OR_ITEM_INSTANCE;
+ }
+
+ public static IDRescorer getUserInstance() {
+ return USER_OR_ITEM_INSTANCE;
+ }
+
+ public static Rescorer<LongPair> getItemItemPairInstance() {
+ return ITEM_ITEM_PAIR_INSTANCE;
+ }
+
+ public static Rescorer<LongPair> getUserUserPairInstance() {
+ return USER_USER_PAIR_INSTANCE;
+ }
+
+ /**
+ * @param thing
+ * to rescore
+ * @param originalScore
+ * current score for item
+ * @return same originalScore as new score, always
+ */
+ @Override
+ public double rescore(T thing, double originalScore) {
+ return originalScore;
+ }
+
+ @Override
+ public boolean isFiltered(T thing) {
+ return false;
+ }
+
+ @Override
+ public double rescore(long id, double originalScore) {
+ return originalScore;
+ }
+
+ @Override
+ public boolean isFiltered(long id) {
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return "NullRescorer";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
new file mode 100644
index 0000000..6297d0b
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+public final class PreferredItemsNeighborhoodCandidateItemsStrategy extends AbstractCandidateItemsStrategy {
+
+ /**
+ * returns all items that have not been rated by the user and that were preferred by another user
+ * that has preferred at least one item that the current user has preferred too
+ */
+ @Override
+ protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems)
+ throws TasteException {
+ FastIDSet possibleItemsIDs = new FastIDSet();
+ for (long itemID : preferredItemIDs) {
+ PreferenceArray itemPreferences = dataModel.getPreferencesForItem(itemID);
+ int numUsersPreferringItem = itemPreferences.length();
+ for (int index = 0; index < numUsersPreferringItem; index++) {
+ possibleItemsIDs.addAll(dataModel.getItemIDsFromUser(itemPreferences.getUserID(index)));
+ }
+ }
+ if (!includeKnownItems) {
+ possibleItemsIDs.removeAll(preferredItemIDs);
+ }
+ return possibleItemsIDs;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java
new file mode 100644
index 0000000..08aa5ae
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.RandomUtils;
+
+/**
+ * Produces random recommendations and preference estimates. This is likely only useful as a novelty and for
+ * benchmarking.
+ */
+public final class RandomRecommender extends AbstractRecommender {
+
+ private final Random random = RandomUtils.getRandom();
+ private final float minPref;
+ private final float maxPref;
+
+ public RandomRecommender(DataModel dataModel) throws TasteException {
+ super(dataModel);
+ float maxPref = Float.NEGATIVE_INFINITY;
+ float minPref = Float.POSITIVE_INFINITY;
+ LongPrimitiveIterator userIterator = dataModel.getUserIDs();
+ while (userIterator.hasNext()) {
+ long userID = userIterator.next();
+ PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
+ for (int i = 0; i < prefs.length(); i++) {
+ float prefValue = prefs.getValue(i);
+ if (prefValue < minPref) {
+ minPref = prefValue;
+ }
+ if (prefValue > maxPref) {
+ maxPref = prefValue;
+ }
+ }
+ }
+ this.minPref = minPref;
+ this.maxPref = maxPref;
+ }
+
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
+ throws TasteException {
+ DataModel dataModel = getDataModel();
+ int numItems = dataModel.getNumItems();
+ List<RecommendedItem> result = new ArrayList<>(howMany);
+ while (result.size() < howMany) {
+ LongPrimitiveIterator it = dataModel.getItemIDs();
+ it.skip(random.nextInt(numItems));
+ long itemID = it.next();
+ if (includeKnownItems || dataModel.getPreferenceValue(userID, itemID) == null) {
+ result.add(new GenericRecommendedItem(itemID, randomPref()));
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public float estimatePreference(long userID, long itemID) {
+ return randomPref();
+ }
+
+ private float randomPref() {
+ return minPref + random.nextFloat() * (maxPref - minPref);
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ getDataModel().refresh(alreadyRefreshed);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
new file mode 100644
index 0000000..623a60b
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import com.google.common.base.Preconditions;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.iterator.FixedSizeSamplingIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Iterator;
+
+/**
+ * <p>Returns all items that have not been rated by the user <em>(3)</em> and that were preferred by another user
+ * <em>(2)</em> that has preferred at least one item <em>(1)</em> that the current user has preferred too.</p>
+ *
+ * <p>This strategy uses sampling to limit the number of items that are considered, by sampling three different
+ * things, noted above:</p>
+ *
+ * <ol>
+ * <li>The items that the user has preferred</li>
+ * <li>The users who also prefer each of those items</li>
+ * <li>The items those users also prefer</li>
+ * </ol>
+ *
+ * <p>There is a maximum associated with each of these three things; if the number of items or users exceeds
+ * that max, it is sampled so that the expected number of items or users actually used in that part of the
+ * computation is equal to the max.</p>
+ *
+ * <p>Three arguments control these three maxima. Each is a "factor" f, which establishes the max at
+ * f * log2(n), where n is the number of users or items in the data. For example if factor #2 is 5,
+ * which controls the number of users sampled per item, then 5 * log2(# users) is the maximum for this
+ * part of the computation.</p>
+ *
+ * <p>Each can be set to not do any limiting with value {@link #NO_LIMIT_FACTOR}.</p>
+ */
+public class SamplingCandidateItemsStrategy extends AbstractCandidateItemsStrategy {
+
+ private static final Logger log = LoggerFactory.getLogger(SamplingCandidateItemsStrategy.class);
+
+ /**
+ * Default factor used if not otherwise specified, for all limits. (30).
+ */
+ public static final int DEFAULT_FACTOR = 30;
+ /**
+ * Specify this value as a factor to mean no limit.
+ */
+ public static final int NO_LIMIT_FACTOR = Integer.MAX_VALUE;
+ private static final int MAX_LIMIT = Integer.MAX_VALUE;
+ private static final double LOG2 = Math.log(2.0);
+
+ private final int maxItems;
+ private final int maxUsersPerItem;
+ private final int maxItemsPerUser;
+
+ /**
+ * Defaults to using no limit ({@link #NO_LIMIT_FACTOR}) for all factors, except
+ * {@code candidatesPerUserFactor} which defaults to {@link #DEFAULT_FACTOR}.
+ *
+ * @see #SamplingCandidateItemsStrategy(int, int, int, int, int)
+ */
+ public SamplingCandidateItemsStrategy(int numUsers, int numItems) {
+ this(DEFAULT_FACTOR, DEFAULT_FACTOR, DEFAULT_FACTOR, numUsers, numItems);
+ }
+
+ /**
+ * @param itemsFactor factor controlling max items considered for a user
+ * @param usersPerItemFactor factor controlling max users considered for each of those items
+ * @param candidatesPerUserFactor factor controlling max candidate items considered from each of those users
+ * @param numUsers number of users currently in the data
+ * @param numItems number of items in the data
+ */
+ public SamplingCandidateItemsStrategy(int itemsFactor,
+ int usersPerItemFactor,
+ int candidatesPerUserFactor,
+ int numUsers,
+ int numItems) {
+ Preconditions.checkArgument(itemsFactor > 0, "itemsFactor must be greater then 0!");
+ Preconditions.checkArgument(usersPerItemFactor > 0, "usersPerItemFactor must be greater then 0!");
+ Preconditions.checkArgument(candidatesPerUserFactor > 0, "candidatesPerUserFactor must be greater then 0!");
+ Preconditions.checkArgument(numUsers > 0, "numUsers must be greater then 0!");
+ Preconditions.checkArgument(numItems > 0, "numItems must be greater then 0!");
+ maxItems = computeMaxFrom(itemsFactor, numItems);
+ maxUsersPerItem = computeMaxFrom(usersPerItemFactor, numUsers);
+ maxItemsPerUser = computeMaxFrom(candidatesPerUserFactor, numItems);
+ log.debug("maxItems {}, maxUsersPerItem {}, maxItemsPerUser {}", maxItems, maxUsersPerItem, maxItemsPerUser);
+ }
+
+ private static int computeMaxFrom(int factor, int numThings) {
+ if (factor == NO_LIMIT_FACTOR) {
+ return MAX_LIMIT;
+ }
+ long max = (long) (factor * (1.0 + Math.log(numThings) / LOG2));
+ return max > MAX_LIMIT ? MAX_LIMIT : (int) max;
+ }
+
+ @Override
+ protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems)
+ throws TasteException {
+ LongPrimitiveIterator preferredItemIDsIterator = new LongPrimitiveArrayIterator(preferredItemIDs);
+ if (preferredItemIDs.length > maxItems) {
+ double samplingRate = (double) maxItems / preferredItemIDs.length;
+// log.info("preferredItemIDs.length {}, samplingRate {}", preferredItemIDs.length, samplingRate);
+ preferredItemIDsIterator =
+ new SamplingLongPrimitiveIterator(preferredItemIDsIterator, samplingRate);
+ }
+ FastIDSet possibleItemsIDs = new FastIDSet();
+ while (preferredItemIDsIterator.hasNext()) {
+ long itemID = preferredItemIDsIterator.nextLong();
+ PreferenceArray prefs = dataModel.getPreferencesForItem(itemID);
+ int prefsLength = prefs.length();
+ if (prefsLength > maxUsersPerItem) {
+ Iterator<Preference> sampledPrefs =
+ new FixedSizeSamplingIterator<>(maxUsersPerItem, prefs.iterator());
+ while (sampledPrefs.hasNext()) {
+ addSomeOf(possibleItemsIDs, dataModel.getItemIDsFromUser(sampledPrefs.next().getUserID()));
+ }
+ } else {
+ for (int i = 0; i < prefsLength; i++) {
+ addSomeOf(possibleItemsIDs, dataModel.getItemIDsFromUser(prefs.getUserID(i)));
+ }
+ }
+ }
+ if (!includeKnownItems) {
+ possibleItemsIDs.removeAll(preferredItemIDs);
+ }
+ return possibleItemsIDs;
+ }
+
+ private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) {
+ if (itemIDs.size() > maxItemsPerUser) {
+ LongPrimitiveIterator it =
+ new SamplingLongPrimitiveIterator(itemIDs.iterator(), (double) maxItemsPerUser / itemIDs.size());
+ while (it.hasNext()) {
+ possibleItemIDs.add(it.nextLong());
+ }
+ } else {
+ possibleItemIDs.addAll(itemIDs);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java
new file mode 100644
index 0000000..c6d417f
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.common.RandomUtils;
+
+/** Simply encapsulates a user and a similarity value. */
+public final class SimilarUser implements Comparable<SimilarUser> {
+
+ private final long userID;
+ private final double similarity;
+
+ public SimilarUser(long userID, double similarity) {
+ this.userID = userID;
+ this.similarity = similarity;
+ }
+
+ long getUserID() {
+ return userID;
+ }
+
+ double getSimilarity() {
+ return similarity;
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) userID ^ RandomUtils.hashDouble(similarity);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof SimilarUser)) {
+ return false;
+ }
+ SimilarUser other = (SimilarUser) o;
+ return userID == other.getUserID() && similarity == other.getSimilarity();
+ }
+
+ @Override
+ public String toString() {
+ return "SimilarUser[user:" + userID + ", similarity:" + similarity + ']';
+ }
+
+ /** Defines an ordering from most similar to least similar. */
+ @Override
+ public int compareTo(SimilarUser other) {
+ double otherSimilarity = other.getSimilarity();
+ if (similarity > otherSimilarity) {
+ return -1;
+ }
+ if (similarity < otherSimilarity) {
+ return 1;
+ }
+ long otherUserID = other.getUserID();
+ if (userID < otherUserID) {
+ return -1;
+ }
+ if (userID > otherUserID) {
+ return 1;
+ }
+ return 0;
+ }
+
+}