You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/04 14:29:46 UTC
[44/53] [abbrv] [partial] mahout git commit: end of day 6-2-2018
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserDataModel.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserDataModel.java
new file mode 100644
index 0000000..546349b
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserDataModel.java
@@ -0,0 +1,320 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * This {@link DataModel} decorator class is useful in a situation where you wish to recommend to a user that
+ * doesn't really exist yet in your actual {@link DataModel}. For example maybe you wish to recommend DVDs to
+ * a user who has browsed a few titles on your DVD store site, but, the user is not yet registered.
+ * </p>
+ *
+ * <p>
+ * This enables you to temporarily add a temporary user to an existing {@link DataModel} in a way that
+ * recommenders can then produce recommendations anyway. To do so, wrap your real implementation in this
+ * class:
+ * </p>
+ *
+ * <p>
+ *
+ * <pre>
+ * DataModel realModel = ...;
+ * DataModel plusModel = new PlusAnonymousUserDataModel(realModel);
+ * ...
+ * ItemSimilarity similarity = new LogLikelihoodSimilarity(realModel); // not plusModel
+ * </pre>
+ *
+ * </p>
+ *
+ * <p>
+ * But, you may continue to use {@code realModel} as input to other components. To recommend, first construct and
+ * set the temporary user information on the model and then simply call the recommender. The
+ * {@code synchronized} block exists to remind you that this is of course not thread-safe. Only one set
+ * of temp data can be inserted into the model and used at one time.
+ * </p>
+ *
+ * <p>
+ *
+ * <pre>
+ * Recommender recommender = ...;
+ * ...
+ * synchronized(...) {
+ * PreferenceArray tempPrefs = ...;
+ * plusModel.setTempPrefs(tempPrefs);
+ * recommender.recommend(PlusAnonymousUserDataModel.TEMP_USER_ID, 10);
+ * plusModel.setTempPrefs(null);
+ * }
+ * </pre>
+ *
+ * </p>
+ */
+public class PlusAnonymousUserDataModel implements DataModel {
+
+ public static final long TEMP_USER_ID = Long.MIN_VALUE;
+
+ private final DataModel delegate;
+ private PreferenceArray tempPrefs;
+ private final FastIDSet prefItemIDs;
+
+ private static final Logger log = LoggerFactory.getLogger(PlusAnonymousUserDataModel.class);
+
+ public PlusAnonymousUserDataModel(DataModel delegate) {
+ this.delegate = delegate;
+ this.prefItemIDs = new FastIDSet();
+ }
+
+ protected DataModel getDelegate() {
+ return delegate;
+ }
+
+ public void setTempPrefs(PreferenceArray prefs) {
+ Preconditions.checkArgument(prefs != null && prefs.length() > 0, "prefs is null or empty");
+ this.tempPrefs = prefs;
+ this.prefItemIDs.clear();
+ for (int i = 0; i < prefs.length(); i++) {
+ this.prefItemIDs.add(prefs.getItemID(i));
+ }
+ }
+
+ public void clearTempPrefs() {
+ tempPrefs = null;
+ prefItemIDs.clear();
+ }
+
+ @Override
+ public LongPrimitiveIterator getUserIDs() throws TasteException {
+ if (tempPrefs == null) {
+ return delegate.getUserIDs();
+ }
+ return new PlusAnonymousUserLongPrimitiveIterator(delegate.getUserIDs(), TEMP_USER_ID);
+ }
+
+ @Override
+ public PreferenceArray getPreferencesFromUser(long userID) throws TasteException {
+ if (userID == TEMP_USER_ID) {
+ if (tempPrefs == null) {
+ throw new NoSuchUserException(TEMP_USER_ID);
+ }
+ return tempPrefs;
+ }
+ return delegate.getPreferencesFromUser(userID);
+ }
+
+ @Override
+ public FastIDSet getItemIDsFromUser(long userID) throws TasteException {
+ if (userID == TEMP_USER_ID) {
+ if (tempPrefs == null) {
+ throw new NoSuchUserException(TEMP_USER_ID);
+ }
+ return prefItemIDs;
+ }
+ return delegate.getItemIDsFromUser(userID);
+ }
+
+ @Override
+ public LongPrimitiveIterator getItemIDs() throws TasteException {
+ return delegate.getItemIDs();
+ // Yeah ignoring items that only the plus-one user knows about... can't really happen
+ }
+
+ @Override
+ public PreferenceArray getPreferencesForItem(long itemID) throws TasteException {
+ if (tempPrefs == null) {
+ return delegate.getPreferencesForItem(itemID);
+ }
+ PreferenceArray delegatePrefs = null;
+ try {
+ delegatePrefs = delegate.getPreferencesForItem(itemID);
+ } catch (NoSuchItemException nsie) {
+ // OK. Probably an item that only the anonymous user has
+ if (log.isDebugEnabled()) {
+ log.debug("Item {} unknown", itemID);
+ }
+ }
+ for (int i = 0; i < tempPrefs.length(); i++) {
+ if (tempPrefs.getItemID(i) == itemID) {
+ return cloneAndMergeInto(delegatePrefs, itemID, tempPrefs.getUserID(i), tempPrefs.getValue(i));
+ }
+ }
+ if (delegatePrefs == null) {
+ // No, didn't find it among the anonymous user prefs
+ throw new NoSuchItemException(itemID);
+ }
+ return delegatePrefs;
+ }
+
+ private static PreferenceArray cloneAndMergeInto(PreferenceArray delegatePrefs,
+ long itemID,
+ long newUserID,
+ float value) {
+
+ int length = delegatePrefs == null ? 0 : delegatePrefs.length();
+ int newLength = length + 1;
+ PreferenceArray newPreferenceArray = new GenericItemPreferenceArray(newLength);
+
+ // Set item ID once
+ newPreferenceArray.setItemID(0, itemID);
+
+ int positionToInsert = 0;
+ while (positionToInsert < length && newUserID > delegatePrefs.getUserID(positionToInsert)) {
+ positionToInsert++;
+ }
+
+ for (int i = 0; i < positionToInsert; i++) {
+ newPreferenceArray.setUserID(i, delegatePrefs.getUserID(i));
+ newPreferenceArray.setValue(i, delegatePrefs.getValue(i));
+ }
+ newPreferenceArray.setUserID(positionToInsert, newUserID);
+ newPreferenceArray.setValue(positionToInsert, value);
+ for (int i = positionToInsert + 1; i < newLength; i++) {
+ newPreferenceArray.setUserID(i, delegatePrefs.getUserID(i - 1));
+ newPreferenceArray.setValue(i, delegatePrefs.getValue(i - 1));
+ }
+
+ return newPreferenceArray;
+ }
+
+ @Override
+ public Float getPreferenceValue(long userID, long itemID) throws TasteException {
+ if (userID == TEMP_USER_ID) {
+ if (tempPrefs == null) {
+ throw new NoSuchUserException(TEMP_USER_ID);
+ }
+ for (int i = 0; i < tempPrefs.length(); i++) {
+ if (tempPrefs.getItemID(i) == itemID) {
+ return tempPrefs.getValue(i);
+ }
+ }
+ return null;
+ }
+ return delegate.getPreferenceValue(userID, itemID);
+ }
+
+ @Override
+ public Long getPreferenceTime(long userID, long itemID) throws TasteException {
+ if (userID == TEMP_USER_ID) {
+ if (tempPrefs == null) {
+ throw new NoSuchUserException(TEMP_USER_ID);
+ }
+ return null;
+ }
+ return delegate.getPreferenceTime(userID, itemID);
+ }
+
+ @Override
+ public int getNumItems() throws TasteException {
+ return delegate.getNumItems();
+ }
+
+ @Override
+ public int getNumUsers() throws TasteException {
+ return delegate.getNumUsers() + (tempPrefs == null ? 0 : 1);
+ }
+
+ @Override
+ public int getNumUsersWithPreferenceFor(long itemID) throws TasteException {
+ if (tempPrefs == null) {
+ return delegate.getNumUsersWithPreferenceFor(itemID);
+ }
+ boolean found = false;
+ for (int i = 0; i < tempPrefs.length(); i++) {
+ if (tempPrefs.getItemID(i) == itemID) {
+ found = true;
+ break;
+ }
+ }
+ return delegate.getNumUsersWithPreferenceFor(itemID) + (found ? 1 : 0);
+ }
+
+ @Override
+ public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException {
+ if (tempPrefs == null) {
+ return delegate.getNumUsersWithPreferenceFor(itemID1, itemID2);
+ }
+ boolean found1 = false;
+ boolean found2 = false;
+ for (int i = 0; i < tempPrefs.length() && !(found1 && found2); i++) {
+ long itemID = tempPrefs.getItemID(i);
+ if (itemID == itemID1) {
+ found1 = true;
+ }
+ if (itemID == itemID2) {
+ found2 = true;
+ }
+ }
+ return delegate.getNumUsersWithPreferenceFor(itemID1, itemID2) + (found1 && found2 ? 1 : 0);
+ }
+
+ @Override
+ public void setPreference(long userID, long itemID, float value) throws TasteException {
+ if (userID == TEMP_USER_ID) {
+ if (tempPrefs == null) {
+ throw new NoSuchUserException(TEMP_USER_ID);
+ }
+ throw new UnsupportedOperationException();
+ }
+ delegate.setPreference(userID, itemID, value);
+ }
+
+ @Override
+ public void removePreference(long userID, long itemID) throws TasteException {
+ if (userID == TEMP_USER_ID) {
+ if (tempPrefs == null) {
+ throw new NoSuchUserException(TEMP_USER_ID);
+ }
+ throw new UnsupportedOperationException();
+ }
+ delegate.removePreference(userID, itemID);
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ delegate.refresh(alreadyRefreshed);
+ }
+
+ @Override
+ public boolean hasPreferenceValues() {
+ return delegate.hasPreferenceValues();
+ }
+
+ @Override
+ public float getMaxPreference() {
+ return delegate.getMaxPreference();
+ }
+
+ @Override
+ public float getMinPreference() {
+ return delegate.getMinPreference();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java
new file mode 100644
index 0000000..ea4df85
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import org.apache.mahout.cf.taste.impl.common.AbstractLongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+
+final class PlusAnonymousUserLongPrimitiveIterator extends AbstractLongPrimitiveIterator {
+
+ private final LongPrimitiveIterator delegate;
+ private final long extraDatum;
+ private boolean datumConsumed;
+
+ PlusAnonymousUserLongPrimitiveIterator(LongPrimitiveIterator delegate, long extraDatum) {
+ this.delegate = delegate;
+ this.extraDatum = extraDatum;
+ datumConsumed = false;
+ }
+
+ @Override
+ public long nextLong() {
+ if (datumConsumed) {
+ return delegate.nextLong();
+ } else {
+ if (delegate.hasNext()) {
+ long delegateNext = delegate.peek();
+ if (extraDatum <= delegateNext) {
+ datumConsumed = true;
+ return extraDatum;
+ } else {
+ return delegate.next();
+ }
+ } else {
+ datumConsumed = true;
+ return extraDatum;
+ }
+ }
+ }
+
+ @Override
+ public long peek() {
+ if (datumConsumed) {
+ return delegate.peek();
+ } else {
+ if (delegate.hasNext()) {
+ long delegateNext = delegate.peek();
+ if (extraDatum <= delegateNext) {
+ return extraDatum;
+ } else {
+ return delegateNext;
+ }
+ } else {
+ return extraDatum;
+ }
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !datumConsumed || delegate.hasNext();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void skip(int n) {
+ for (int i = 0; i < n; i++) {
+ nextLong();
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
new file mode 100644
index 0000000..0399618
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
@@ -0,0 +1,758 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.file;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.concurrent.locks.ReentrantLock;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Splitter;
+import com.google.common.io.Closeables;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.model.AbstractDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.iterator.FileLineIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * A {@link DataModel} backed by a delimited file. This class expects a file where each line
+ * contains a user ID, followed by item ID, followed by optional preference value, followed by
+ * optional timestamp. Commas or tabs delimit fields:
+ * </p>
+ *
+ * <p>{@code userID,itemID[,preference[,timestamp]]}</p>
+ *
+ * <p>
+ * Preference value is optional to accommodate applications that have no notion of a
+ * preference value (that is, the user simply expresses a
+ * preference for an item, but no degree of preference).
+ * </p>
+ *
+ * <p>
+ * The preference value is assumed to be parseable as a {@code double}. The user IDs and item IDs are
+ * read parsed as {@code long}s. The timestamp, if present, is assumed to be parseable as a
+ * {@code long}, though this can be overridden via {@link #readTimestampFromString(String)}.
+ * The preference value may be empty, to indicate "no preference value", but cannot be empty. That is,
+ * this is legal:
+ * </p>
+ *
+ * <p>{@code 123,456,,129050099059}</p>
+ *
+ * <p>But this isn't:</p>
+ *
+ * <p>{@code 123,456,129050099059}</p>
+ *
+ * <p>
+ * It is also acceptable for the lines to contain additional fields. Fields beyond the third will be ignored.
+ * An empty line, or one that begins with '#' will be ignored as a comment.
+ * </p>
+ *
+ * <p>
+ * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file
+ * has been reloaded very recently already.
+ * </p>
+ *
+ * <p>
+ * This class will also look for update "delta" files in the same directory, with file names that start the
+ * same way (up to the first period). These files have the same format, and provide updated data that
+ * supersedes what is in the main data file. This is a mechanism that allows an application to push updates to
+ * {@link FileDataModel} without re-copying the entire data file.
+ * </p>
+ *
+ * <p>
+ * One small format difference exists. Update files must also be able to express deletes.
+ * This is done by ending with a blank preference value, as in "123,456,".
+ * </p>
+ *
+ * <p>
+ * Note that it's all-or-nothing -- all of the items in the file must express no preference, or the all must.
+ * These cannot be mixed. Put another way there will always be the same number of delimiters on every line of
+ * the file!
+ * </p>
+ *
+ * <p>
+ * This class is not intended for use with very large amounts of data (over, say, tens of millions of rows).
+ * For that, a JDBC-backed {@link DataModel} and a database are more appropriate.
+ * </p>
+ *
+ * <p>
+ * It is possible and likely useful to subclass this class and customize its behavior to accommodate
+ * application-specific needs and input formats. See {@link #processLine(String, FastByIDMap, FastByIDMap, boolean)} and
+ * {@link #processLineWithoutID(String, FastByIDMap, FastByIDMap)}
+ */
+public class FileDataModel extends AbstractDataModel {
+
+ private static final Logger log = LoggerFactory.getLogger(FileDataModel.class);
+
+ public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
+ private static final char COMMENT_CHAR = '#';
+ private static final char[] DELIMIETERS = {',', '\t'};
+
+ private final File dataFile;
+ private long lastModified;
+ private long lastUpdateFileModified;
+ private final transient Splitter delimiterPattern;
+ private final boolean hasPrefValues;
+ private DataModel delegate;
+ private final ReentrantLock reloadLock;
+ private final boolean transpose;
+ private final long minReloadIntervalMS;
+
+ /**
+ * @param dataFile
+ * file containing preferences data. If file is compressed (and name ends in .gz or .zip
+ * accordingly) it will be decompressed as it is read)
+ * @throws FileNotFoundException
+ * if dataFile does not exist
+ * @throws IOException
+ * if file can't be read
+ */
+ public FileDataModel(File dataFile) throws IOException {
+ this(dataFile, false, DEFAULT_MIN_RELOAD_INTERVAL_MS);
+ }
+
+ /**
+ * @param delimiterRegex If your data file don't use '\t' or ',' as delimiter, you can specify
+ * a custom regex pattern.
+ */
+ public FileDataModel(File dataFile, String delimiterRegex) throws IOException {
+ this(dataFile, false, DEFAULT_MIN_RELOAD_INTERVAL_MS, delimiterRegex);
+ }
+
+ /**
+ * @param transpose
+ * transposes user IDs and item IDs -- convenient for 'flipping' the data model this way
+ * @param minReloadIntervalMS
+ * the minimum interval in milliseconds after which a full reload of the original datafile is done
+ * when refresh() is called
+ * @see #FileDataModel(File)
+ */
+ public FileDataModel(File dataFile, boolean transpose, long minReloadIntervalMS) throws IOException {
+ this(dataFile, transpose, minReloadIntervalMS, null);
+ }
+
+ /**
+ * @param delimiterRegex If your data file don't use '\t' or ',' as delimiters, you can specify
+ * user own using regex pattern.
+ * @throws IOException
+ */
+ public FileDataModel(File dataFile, boolean transpose, long minReloadIntervalMS, String delimiterRegex)
+ throws IOException {
+
+ this.dataFile = Preconditions.checkNotNull(dataFile.getAbsoluteFile());
+ if (!dataFile.exists() || dataFile.isDirectory()) {
+ throw new FileNotFoundException(dataFile.toString());
+ }
+ Preconditions.checkArgument(dataFile.length() > 0L, "dataFile is empty");
+ Preconditions.checkArgument(minReloadIntervalMS >= 0L, "minReloadIntervalMs must be non-negative");
+
+ log.info("Creating FileDataModel for file {}", dataFile);
+
+ this.lastModified = dataFile.lastModified();
+ this.lastUpdateFileModified = readLastUpdateFileModified();
+
+ FileLineIterator iterator = new FileLineIterator(dataFile, false);
+ String firstLine = iterator.peek();
+ while (firstLine.isEmpty() || firstLine.charAt(0) == COMMENT_CHAR) {
+ iterator.next();
+ firstLine = iterator.peek();
+ }
+ Closeables.close(iterator, true);
+
+ char delimiter;
+ if (delimiterRegex == null) {
+ delimiter = determineDelimiter(firstLine);
+ delimiterPattern = Splitter.on(delimiter);
+ } else {
+ delimiter = '\0';
+ delimiterPattern = Splitter.onPattern(delimiterRegex);
+ if (!delimiterPattern.split(firstLine).iterator().hasNext()) {
+ throw new IllegalArgumentException("Did not find a delimiter(pattern) in first line");
+ }
+ }
+ List<String> firstLineSplit = new ArrayList<>();
+ for (String token : delimiterPattern.split(firstLine)) {
+ firstLineSplit.add(token);
+ }
+ // If preference value exists and isn't empty then the file is specifying pref values
+ hasPrefValues = firstLineSplit.size() >= 3 && !firstLineSplit.get(2).isEmpty();
+
+ this.reloadLock = new ReentrantLock();
+ this.transpose = transpose;
+ this.minReloadIntervalMS = minReloadIntervalMS;
+
+ reload();
+ }
+
+ public File getDataFile() {
+ return dataFile;
+ }
+
+ protected void reload() {
+ if (reloadLock.tryLock()) {
+ try {
+ delegate = buildModel();
+ } catch (IOException ioe) {
+ log.warn("Exception while reloading", ioe);
+ } finally {
+ reloadLock.unlock();
+ }
+ }
+ }
+
+ protected DataModel buildModel() throws IOException {
+
+ long newLastModified = dataFile.lastModified();
+ long newLastUpdateFileModified = readLastUpdateFileModified();
+
+ boolean loadFreshData = delegate == null || newLastModified > lastModified + minReloadIntervalMS;
+
+ long oldLastUpdateFileModifieid = lastUpdateFileModified;
+ lastModified = newLastModified;
+ lastUpdateFileModified = newLastUpdateFileModified;
+
+ FastByIDMap<FastByIDMap<Long>> timestamps = new FastByIDMap<>();
+
+ if (hasPrefValues) {
+
+ if (loadFreshData) {
+
+ FastByIDMap<Collection<Preference>> data = new FastByIDMap<>();
+ FileLineIterator iterator = new FileLineIterator(dataFile, false);
+ processFile(iterator, data, timestamps, false);
+
+ for (File updateFile : findUpdateFilesAfter(newLastModified)) {
+ processFile(new FileLineIterator(updateFile, false), data, timestamps, false);
+ }
+
+ return new GenericDataModel(GenericDataModel.toDataMap(data, true), timestamps);
+
+ } else {
+
+ FastByIDMap<PreferenceArray> rawData = ((GenericDataModel) delegate).getRawUserData();
+
+ for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) {
+ processFile(new FileLineIterator(updateFile, false), rawData, timestamps, true);
+ }
+
+ return new GenericDataModel(rawData, timestamps);
+
+ }
+
+ } else {
+
+ if (loadFreshData) {
+
+ FastByIDMap<FastIDSet> data = new FastByIDMap<>();
+ FileLineIterator iterator = new FileLineIterator(dataFile, false);
+ processFileWithoutID(iterator, data, timestamps);
+
+ for (File updateFile : findUpdateFilesAfter(newLastModified)) {
+ processFileWithoutID(new FileLineIterator(updateFile, false), data, timestamps);
+ }
+
+ return new GenericBooleanPrefDataModel(data, timestamps);
+
+ } else {
+
+ FastByIDMap<FastIDSet> rawData = ((GenericBooleanPrefDataModel) delegate).getRawUserData();
+
+ for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) {
+ processFileWithoutID(new FileLineIterator(updateFile, false), rawData, timestamps);
+ }
+
+ return new GenericBooleanPrefDataModel(rawData, timestamps);
+
+ }
+
+ }
+ }
+
+ /**
+ * Finds update delta files in the same directory as the data file. This finds any file whose name starts
+ * the same way as the data file (up to first period) but isn't the data file itself. For example, if the
+ * data file is /foo/data.txt.gz, you might place update files at /foo/data.1.txt.gz, /foo/data.2.txt.gz,
+ * etc.
+ */
+ private Iterable<File> findUpdateFilesAfter(long minimumLastModified) {
+ String dataFileName = dataFile.getName();
+ int period = dataFileName.indexOf('.');
+ String startName = period < 0 ? dataFileName : dataFileName.substring(0, period);
+ File parentDir = dataFile.getParentFile();
+ Map<Long, File> modTimeToUpdateFile = new TreeMap<>();
+ FileFilter onlyFiles = new FileFilter() {
+ @Override
+ public boolean accept(File file) {
+ return !file.isDirectory();
+ }
+ };
+ for (File updateFile : parentDir.listFiles(onlyFiles)) {
+ String updateFileName = updateFile.getName();
+ if (updateFileName.startsWith(startName)
+ && !updateFileName.equals(dataFileName)
+ && updateFile.lastModified() >= minimumLastModified) {
+ modTimeToUpdateFile.put(updateFile.lastModified(), updateFile);
+ }
+ }
+ return modTimeToUpdateFile.values();
+ }
+
+ private long readLastUpdateFileModified() {
+ long mostRecentModification = Long.MIN_VALUE;
+ for (File updateFile : findUpdateFilesAfter(0L)) {
+ mostRecentModification = Math.max(mostRecentModification, updateFile.lastModified());
+ }
+ return mostRecentModification;
+ }
+
+ public static char determineDelimiter(String line) {
+ for (char possibleDelimieter : DELIMIETERS) {
+ if (line.indexOf(possibleDelimieter) >= 0) {
+ return possibleDelimieter;
+ }
+ }
+ throw new IllegalArgumentException("Did not find a delimiter in first line");
+ }
+
+ protected void processFile(FileLineIterator dataOrUpdateFileIterator,
+ FastByIDMap<?> data,
+ FastByIDMap<FastByIDMap<Long>> timestamps,
+ boolean fromPriorData) {
+ log.info("Reading file info...");
+ int count = 0;
+ while (dataOrUpdateFileIterator.hasNext()) {
+ String line = dataOrUpdateFileIterator.next();
+ if (!line.isEmpty()) {
+ processLine(line, data, timestamps, fromPriorData);
+ if (++count % 1000000 == 0) {
+ log.info("Processed {} lines", count);
+ }
+ }
+ }
+ log.info("Read lines: {}", count);
+ }
+
+ /**
+ * <p>
+ * Reads one line from the input file and adds the data to a {@link FastByIDMap} data structure which maps user IDs
+ * to preferences. This assumes that each line of the input file corresponds to one preference. After
+ * reading a line and determining which user and item the preference pertains to, the method should look to
+ * see if the data contains a mapping for the user ID already, and if not, add an empty data structure of preferences
+ * as appropriate to the data.
+ * </p>
+ *
+ * <p>
+ * Note that if the line is empty or begins with '#' it will be ignored as a comment.
+ * </p>
+ *
+ * @param line
+ * line from input data file
+ * @param data
+ * all data read so far, as a mapping from user IDs to preferences
+ * @param fromPriorData an implementation detail -- if true, data will map IDs to
+ * {@link PreferenceArray} since the framework is attempting to read and update raw
+ * data that is already in memory. Otherwise it maps to {@link Collection}s of
+ * {@link Preference}s, since it's reading fresh data. Subclasses must be prepared
+ * to handle this wrinkle.
+ */
+ protected void processLine(String line,
+ FastByIDMap<?> data,
+ FastByIDMap<FastByIDMap<Long>> timestamps,
+ boolean fromPriorData) {
+
+ // Ignore empty lines and comments
+ if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) {
+ return;
+ }
+
+ Iterator<String> tokens = delimiterPattern.split(line).iterator();
+ String userIDString = tokens.next();
+ String itemIDString = tokens.next();
+ String preferenceValueString = tokens.next();
+ boolean hasTimestamp = tokens.hasNext();
+ String timestampString = hasTimestamp ? tokens.next() : null;
+
+ long userID = readUserIDFromString(userIDString);
+ long itemID = readItemIDFromString(itemIDString);
+
+ if (transpose) {
+ long tmp = userID;
+ userID = itemID;
+ itemID = tmp;
+ }
+
+ // This is kind of gross but need to handle two types of storage
+ Object maybePrefs = data.get(userID);
+ if (fromPriorData) {
+ // Data are PreferenceArray
+
+ PreferenceArray prefs = (PreferenceArray) maybePrefs;
+ if (!hasTimestamp && preferenceValueString.isEmpty()) {
+ // Then line is of form "userID,itemID,", meaning remove
+ if (prefs != null) {
+ boolean exists = false;
+ int length = prefs.length();
+ for (int i = 0; i < length; i++) {
+ if (prefs.getItemID(i) == itemID) {
+ exists = true;
+ break;
+ }
+ }
+ if (exists) {
+ if (length == 1) {
+ data.remove(userID);
+ } else {
+ PreferenceArray newPrefs = new GenericUserPreferenceArray(length - 1);
+ for (int i = 0, j = 0; i < length; i++, j++) {
+ if (prefs.getItemID(i) == itemID) {
+ j--;
+ } else {
+ newPrefs.set(j, prefs.get(i));
+ }
+ }
+ ((FastByIDMap<PreferenceArray>) data).put(userID, newPrefs);
+ }
+ }
+ }
+
+ removeTimestamp(userID, itemID, timestamps);
+
+ } else {
+
+ float preferenceValue = Float.parseFloat(preferenceValueString);
+
+ boolean exists = false;
+ if (prefs != null) {
+ for (int i = 0; i < prefs.length(); i++) {
+ if (prefs.getItemID(i) == itemID) {
+ exists = true;
+ prefs.setValue(i, preferenceValue);
+ break;
+ }
+ }
+ }
+
+ if (!exists) {
+ if (prefs == null) {
+ prefs = new GenericUserPreferenceArray(1);
+ } else {
+ PreferenceArray newPrefs = new GenericUserPreferenceArray(prefs.length() + 1);
+ for (int i = 0, j = 1; i < prefs.length(); i++, j++) {
+ newPrefs.set(j, prefs.get(i));
+ }
+ prefs = newPrefs;
+ }
+ prefs.setUserID(0, userID);
+ prefs.setItemID(0, itemID);
+ prefs.setValue(0, preferenceValue);
+ ((FastByIDMap<PreferenceArray>) data).put(userID, prefs);
+ }
+ }
+
+ addTimestamp(userID, itemID, timestampString, timestamps);
+
+ } else {
+ // Data are Collection<Preference>
+
+ Collection<Preference> prefs = (Collection<Preference>) maybePrefs;
+
+ if (!hasTimestamp && preferenceValueString.isEmpty()) {
+ // Then line is of form "userID,itemID,", meaning remove
+ if (prefs != null) {
+ // remove pref
+ Iterator<Preference> prefsIterator = prefs.iterator();
+ while (prefsIterator.hasNext()) {
+ Preference pref = prefsIterator.next();
+ if (pref.getItemID() == itemID) {
+ prefsIterator.remove();
+ break;
+ }
+ }
+ }
+
+ removeTimestamp(userID, itemID, timestamps);
+
+ } else {
+
+ float preferenceValue = Float.parseFloat(preferenceValueString);
+
+ boolean exists = false;
+ if (prefs != null) {
+ for (Preference pref : prefs) {
+ if (pref.getItemID() == itemID) {
+ exists = true;
+ pref.setValue(preferenceValue);
+ break;
+ }
+ }
+ }
+
+ if (!exists) {
+ if (prefs == null) {
+ prefs = new ArrayList<>(2);
+ ((FastByIDMap<Collection<Preference>>) data).put(userID, prefs);
+ }
+ prefs.add(new GenericPreference(userID, itemID, preferenceValue));
+ }
+
+ addTimestamp(userID, itemID, timestampString, timestamps);
+
+ }
+
+ }
+ }
+
+ protected void processFileWithoutID(FileLineIterator dataOrUpdateFileIterator,
+ FastByIDMap<FastIDSet> data,
+ FastByIDMap<FastByIDMap<Long>> timestamps) {
+ log.info("Reading file info...");
+ int count = 0;
+ while (dataOrUpdateFileIterator.hasNext()) {
+ String line = dataOrUpdateFileIterator.next();
+ if (!line.isEmpty()) {
+ processLineWithoutID(line, data, timestamps);
+ if (++count % 100000 == 0) {
+ log.info("Processed {} lines", count);
+ }
+ }
+ }
+ log.info("Read lines: {}", count);
+ }
+
+ protected void processLineWithoutID(String line,
+ FastByIDMap<FastIDSet> data,
+ FastByIDMap<FastByIDMap<Long>> timestamps) {
+
+ if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) {
+ return;
+ }
+
+ Iterator<String> tokens = delimiterPattern.split(line).iterator();
+ String userIDString = tokens.next();
+ String itemIDString = tokens.next();
+ boolean hasPreference = tokens.hasNext();
+ String preferenceValueString = hasPreference ? tokens.next() : "";
+ boolean hasTimestamp = tokens.hasNext();
+ String timestampString = hasTimestamp ? tokens.next() : null;
+
+ long userID = readUserIDFromString(userIDString);
+ long itemID = readItemIDFromString(itemIDString);
+
+ if (transpose) {
+ long tmp = userID;
+ userID = itemID;
+ itemID = tmp;
+ }
+
+ if (hasPreference && !hasTimestamp && preferenceValueString.isEmpty()) {
+ // Then line is of form "userID,itemID,", meaning remove
+
+ FastIDSet itemIDs = data.get(userID);
+ if (itemIDs != null) {
+ itemIDs.remove(itemID);
+ }
+
+ removeTimestamp(userID, itemID, timestamps);
+
+ } else {
+
+ FastIDSet itemIDs = data.get(userID);
+ if (itemIDs == null) {
+ itemIDs = new FastIDSet(2);
+ data.put(userID, itemIDs);
+ }
+ itemIDs.add(itemID);
+
+ addTimestamp(userID, itemID, timestampString, timestamps);
+
+ }
+ }
+
+ private void addTimestamp(long userID,
+ long itemID,
+ String timestampString,
+ FastByIDMap<FastByIDMap<Long>> timestamps) {
+ if (timestampString != null) {
+ FastByIDMap<Long> itemTimestamps = timestamps.get(userID);
+ if (itemTimestamps == null) {
+ itemTimestamps = new FastByIDMap<>();
+ timestamps.put(userID, itemTimestamps);
+ }
+ long timestamp = readTimestampFromString(timestampString);
+ itemTimestamps.put(itemID, timestamp);
+ }
+ }
+
+ private static void removeTimestamp(long userID,
+ long itemID,
+ FastByIDMap<FastByIDMap<Long>> timestamps) {
+ FastByIDMap<Long> itemTimestamps = timestamps.get(userID);
+ if (itemTimestamps != null) {
+ itemTimestamps.remove(itemID);
+ }
+ }
+
+ /**
+ * Subclasses may wish to override this if ID values in the file are not numeric. This provides a hook by
+ * which subclasses can inject an {@link org.apache.mahout.cf.taste.model.IDMigrator} to perform
+ * translation.
+ */
+ protected long readUserIDFromString(String value) {
+ return Long.parseLong(value);
+ }
+
+ /**
+ * Subclasses may wish to override this if ID values in the file are not numeric. This provides a hook by
+ * which subclasses can inject an {@link org.apache.mahout.cf.taste.model.IDMigrator} to perform
+ * translation.
+ */
+ protected long readItemIDFromString(String value) {
+ return Long.parseLong(value);
+ }
+
+ /**
+ * Subclasses may wish to override this to change how time values in the input file are parsed.
+ * By default they are expected to be numeric, expressing a time as milliseconds since the epoch.
+ */
+ protected long readTimestampFromString(String value) {
+ return Long.parseLong(value);
+ }
+
+ @Override
+ public LongPrimitiveIterator getUserIDs() throws TasteException {
+ return delegate.getUserIDs();
+ }
+
+ @Override
+ public PreferenceArray getPreferencesFromUser(long userID) throws TasteException {
+ return delegate.getPreferencesFromUser(userID);
+ }
+
+ @Override
+ public FastIDSet getItemIDsFromUser(long userID) throws TasteException {
+ return delegate.getItemIDsFromUser(userID);
+ }
+
+ @Override
+ public LongPrimitiveIterator getItemIDs() throws TasteException {
+ return delegate.getItemIDs();
+ }
+
+ @Override
+ public PreferenceArray getPreferencesForItem(long itemID) throws TasteException {
+ return delegate.getPreferencesForItem(itemID);
+ }
+
+ @Override
+ public Float getPreferenceValue(long userID, long itemID) throws TasteException {
+ return delegate.getPreferenceValue(userID, itemID);
+ }
+
+ @Override
+ public Long getPreferenceTime(long userID, long itemID) throws TasteException {
+ return delegate.getPreferenceTime(userID, itemID);
+ }
+
+ @Override
+ public int getNumItems() throws TasteException {
+ return delegate.getNumItems();
+ }
+
+ @Override
+ public int getNumUsers() throws TasteException {
+ return delegate.getNumUsers();
+ }
+
+ @Override
+ public int getNumUsersWithPreferenceFor(long itemID) throws TasteException {
+ return delegate.getNumUsersWithPreferenceFor(itemID);
+ }
+
+ @Override
+ public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException {
+ return delegate.getNumUsersWithPreferenceFor(itemID1, itemID2);
+ }
+
+ /**
+ * Note that this method only updates the in-memory preference data that this {@link FileDataModel}
+ * maintains; it does not modify any data on disk. Therefore any updates from this method are only
+ * temporary, and lost when data is reloaded from a file. This method should also be considered relatively
+ * slow.
+ */
+ @Override
+ public void setPreference(long userID, long itemID, float value) throws TasteException {
+ delegate.setPreference(userID, itemID, value);
+ }
+
+ /** See the warning at {@link #setPreference(long, long, float)}. */
+ @Override
+ public void removePreference(long userID, long itemID) throws TasteException {
+ delegate.removePreference(userID, itemID);
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ if (dataFile.lastModified() > lastModified + minReloadIntervalMS
+ || readLastUpdateFileModified() > lastUpdateFileModified + minReloadIntervalMS) {
+ log.debug("File has changed; reloading...");
+ reload();
+ }
+ }
+
+ @Override
+ public boolean hasPreferenceValues() {
+ return delegate.hasPreferenceValues();
+ }
+
+ @Override
+ public float getMaxPreference() {
+ return delegate.getMaxPreference();
+ }
+
+ @Override
+ public float getMinPreference() {
+ return delegate.getMinPreference();
+ }
+
+ @Override
+ public String toString() {
+ return "FileDataModel[dataFile:" + dataFile + ']';
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java
new file mode 100644
index 0000000..1bcb4ef
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.file;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.concurrent.locks.ReentrantLock;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.model.AbstractIDMigrator;
+import org.apache.mahout.common.iterator.FileLineIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * An {@link org.apache.mahout.cf.taste.model.IDMigrator} backed by a file.
+ * This class typically expects a file where each line
+ * contains a single stringID to be stored in this migrator.
+ * </p>
+ *
+ * <p>
+ * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file
+ * has been reloaded very recently already.
+ * </p>
+ */
+public class FileIDMigrator extends AbstractIDMigrator {
+
+ public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
+
+ private final File dataFile;
+ private FastByIDMap<String> longToString;
+ private final ReentrantLock reloadLock;
+
+ private long lastModified;
+ private final long minReloadIntervalMS;
+
+ private static final Logger log = LoggerFactory.getLogger(FileIDMigrator.class);
+
+ public FileIDMigrator(File dataFile) throws FileNotFoundException {
+ this(dataFile, DEFAULT_MIN_RELOAD_INTERVAL_MS);
+ }
+
+ public FileIDMigrator(File dataFile, long minReloadIntervalMS) throws FileNotFoundException {
+ longToString = new FastByIDMap<>(100);
+ this.dataFile = Preconditions.checkNotNull(dataFile);
+ if (!dataFile.exists() || dataFile.isDirectory()) {
+ throw new FileNotFoundException(dataFile.toString());
+ }
+
+ log.info("Creating FileReadonlyIDMigrator for file {}", dataFile);
+
+ this.reloadLock = new ReentrantLock();
+ this.lastModified = dataFile.lastModified();
+ this.minReloadIntervalMS = minReloadIntervalMS;
+
+ reload();
+ }
+
+ @Override
+ public String toStringID(long longID) {
+ return longToString.get(longID);
+ }
+
+ private void reload() {
+ if (reloadLock.tryLock()) {
+ try {
+ longToString = buildMapping();
+ } catch (IOException ioe) {
+ throw new IllegalStateException(ioe);
+ } finally {
+ reloadLock.unlock();
+ }
+ }
+ }
+
+ private FastByIDMap<String> buildMapping() throws IOException {
+ FastByIDMap<String> mapping = new FastByIDMap<>();
+ for (String line : new FileLineIterable(dataFile)) {
+ mapping.put(toLongID(line), line);
+ }
+ lastModified = dataFile.lastModified();
+ return mapping;
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ if (dataFile.lastModified() > lastModified + minReloadIntervalMS) {
+ log.debug("File has changed; reloading...");
+ reload();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "FileIDMigrator[dataFile:" + dataFile + ']';
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
new file mode 100644
index 0000000..8d33f60
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Contains methods and resources useful to all classes in this package.
+ * </p>
+ */
+abstract class AbstractUserNeighborhood implements UserNeighborhood {
+
+ private final UserSimilarity userSimilarity;
+ private final DataModel dataModel;
+ private final double samplingRate;
+ private final RefreshHelper refreshHelper;
+
+ AbstractUserNeighborhood(UserSimilarity userSimilarity, DataModel dataModel, double samplingRate) {
+ Preconditions.checkArgument(userSimilarity != null, "userSimilarity is null");
+ Preconditions.checkArgument(dataModel != null, "dataModel is null");
+ Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0, "samplingRate must be in (0,1]");
+ this.userSimilarity = userSimilarity;
+ this.dataModel = dataModel;
+ this.samplingRate = samplingRate;
+ this.refreshHelper = new RefreshHelper(null);
+ this.refreshHelper.addDependency(this.dataModel);
+ this.refreshHelper.addDependency(this.userSimilarity);
+ }
+
+ final UserSimilarity getUserSimilarity() {
+ return userSimilarity;
+ }
+
+ final DataModel getDataModel() {
+ return dataModel;
+ }
+
+ final double getSamplingRate() {
+ return samplingRate;
+ }
+
+ @Override
+ public final void refresh(Collection<Refreshable> alreadyRefreshed) {
+ refreshHelper.refresh(alreadyRefreshed);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
new file mode 100644
index 0000000..998e476
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.Cache;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.Retriever;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+
+import com.google.common.base.Preconditions;
+
+/** A caching wrapper around an underlying {@link UserNeighborhood} implementation. */
+public final class CachingUserNeighborhood implements UserNeighborhood {
+
+ private final UserNeighborhood neighborhood;
+ private final Cache<Long,long[]> neighborhoodCache;
+
+ public CachingUserNeighborhood(UserNeighborhood neighborhood, DataModel dataModel) throws TasteException {
+ Preconditions.checkArgument(neighborhood != null, "neighborhood is null");
+ this.neighborhood = neighborhood;
+ int maxCacheSize = dataModel.getNumUsers(); // just a dumb heuristic for sizing
+ this.neighborhoodCache = new Cache<>(new NeighborhoodRetriever(neighborhood), maxCacheSize);
+ }
+
+ @Override
+ public long[] getUserNeighborhood(long userID) throws TasteException {
+ return neighborhoodCache.get(userID);
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ neighborhoodCache.clear();
+ Collection<Refreshable> refreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+ RefreshHelper.maybeRefresh(refreshed, neighborhood);
+ }
+
+ private static final class NeighborhoodRetriever implements Retriever<Long,long[]> {
+ private final UserNeighborhood neighborhood;
+
+ private NeighborhoodRetriever(UserNeighborhood neighborhood) {
+ this.neighborhood = neighborhood;
+ }
+
+ @Override
+ public long[] get(Long key) throws TasteException {
+ return neighborhood.getUserNeighborhood(key);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
new file mode 100644
index 0000000..7f3a98a
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.recommender.TopItems;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Computes a neighborhood consisting of the nearest n users to a given user. "Nearest" is defined by the
+ * given {@link UserSimilarity}.
+ * </p>
+ */
+public final class NearestNUserNeighborhood extends AbstractUserNeighborhood {
+
+ private final int n;
+ private final double minSimilarity;
+
+ /**
+ * @param n neighborhood size; capped at the number of users in the data model
+ * @throws IllegalArgumentException
+ * if {@code n < 1}, or userSimilarity or dataModel are {@code null}
+ */
+ public NearestNUserNeighborhood(int n, UserSimilarity userSimilarity, DataModel dataModel) throws TasteException {
+ this(n, Double.NEGATIVE_INFINITY, userSimilarity, dataModel, 1.0);
+ }
+
+ /**
+ * @param n neighborhood size; capped at the number of users in the data model
+ * @param minSimilarity minimal similarity required for neighbors
+ * @throws IllegalArgumentException
+ * if {@code n < 1}, or userSimilarity or dataModel are {@code null}
+ */
+ public NearestNUserNeighborhood(int n,
+ double minSimilarity,
+ UserSimilarity userSimilarity,
+ DataModel dataModel) throws TasteException {
+ this(n, minSimilarity, userSimilarity, dataModel, 1.0);
+ }
+
+ /**
+ * @param n neighborhood size; capped at the number of users in the data model
+ * @param minSimilarity minimal similarity required for neighbors
+ * @param samplingRate percentage of users to consider when building neighborhood -- decrease to trade quality for
+ * performance
+ * @throws IllegalArgumentException
+ * if {@code n < 1} or samplingRate is NaN or not in (0,1], or userSimilarity or dataModel are
+ * {@code null}
+ */
+ public NearestNUserNeighborhood(int n,
+ double minSimilarity,
+ UserSimilarity userSimilarity,
+ DataModel dataModel,
+ double samplingRate) throws TasteException {
+ super(userSimilarity, dataModel, samplingRate);
+ Preconditions.checkArgument(n >= 1, "n must be at least 1");
+ int numUsers = dataModel.getNumUsers();
+ this.n = n > numUsers ? numUsers : n;
+ this.minSimilarity = minSimilarity;
+ }
+
+ @Override
+ public long[] getUserNeighborhood(long userID) throws TasteException {
+
+ DataModel dataModel = getDataModel();
+ UserSimilarity userSimilarityImpl = getUserSimilarity();
+
+ TopItems.Estimator<Long> estimator = new Estimator(userSimilarityImpl, userID, minSimilarity);
+
+ LongPrimitiveIterator userIDs = SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel.getUserIDs(),
+ getSamplingRate());
+
+ return TopItems.getTopUsers(n, userIDs, null, estimator);
+ }
+
+ @Override
+ public String toString() {
+ return "NearestNUserNeighborhood";
+ }
+
+ private static final class Estimator implements TopItems.Estimator<Long> {
+ private final UserSimilarity userSimilarityImpl;
+ private final long theUserID;
+ private final double minSim;
+
+ private Estimator(UserSimilarity userSimilarityImpl, long theUserID, double minSim) {
+ this.userSimilarityImpl = userSimilarityImpl;
+ this.theUserID = theUserID;
+ this.minSim = minSim;
+ }
+
+ @Override
+ public double estimate(Long userID) throws TasteException {
+ if (userID == theUserID) {
+ return Double.NaN;
+ }
+ double sim = userSimilarityImpl.userSimilarity(theUserID, userID);
+ return sim >= minSim ? sim : Double.NaN;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
new file mode 100644
index 0000000..d5246e4
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Computes a neigbhorhood consisting of all users whose similarity to the given user meets or exceeds a
+ * certain threshold. Similarity is defined by the given {@link UserSimilarity}.
+ * </p>
+ */
+public final class ThresholdUserNeighborhood extends AbstractUserNeighborhood {
+
+ private final double threshold;
+
+ /**
+ * @param threshold
+ * similarity threshold
+ * @param userSimilarity
+ * similarity metric
+ * @param dataModel
+ * data model
+ * @throws IllegalArgumentException
+ * if threshold is {@link Double#NaN}, or if samplingRate is not positive and less than or equal
+ * to 1.0, or if userSimilarity or dataModel are {@code null}
+ */
+ public ThresholdUserNeighborhood(double threshold, UserSimilarity userSimilarity, DataModel dataModel) {
+ this(threshold, userSimilarity, dataModel, 1.0);
+ }
+
+ /**
+ * @param threshold
+ * similarity threshold
+ * @param userSimilarity
+ * similarity metric
+ * @param dataModel
+ * data model
+ * @param samplingRate
+ * percentage of users to consider when building neighborhood -- decrease to trade quality for
+ * performance
+ * @throws IllegalArgumentException
+ * if threshold or samplingRate is {@link Double#NaN}, or if samplingRate is not positive and less
+ * than or equal to 1.0, or if userSimilarity or dataModel are {@code null}
+ */
+ public ThresholdUserNeighborhood(double threshold,
+ UserSimilarity userSimilarity,
+ DataModel dataModel,
+ double samplingRate) {
+ super(userSimilarity, dataModel, samplingRate);
+ Preconditions.checkArgument(!Double.isNaN(threshold), "threshold must not be NaN");
+ this.threshold = threshold;
+ }
+
+ @Override
+ public long[] getUserNeighborhood(long userID) throws TasteException {
+
+ DataModel dataModel = getDataModel();
+ FastIDSet neighborhood = new FastIDSet();
+ LongPrimitiveIterator usersIterable = SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel
+ .getUserIDs(), getSamplingRate());
+ UserSimilarity userSimilarityImpl = getUserSimilarity();
+
+ while (usersIterable.hasNext()) {
+ long otherUserID = usersIterable.next();
+ if (userID != otherUserID) {
+ double theSimilarity = userSimilarityImpl.userSimilarity(userID, otherUserID);
+ if (!Double.isNaN(theSimilarity) && theSimilarity >= threshold) {
+ neighborhood.add(otherUserID);
+ }
+ }
+ }
+
+ return neighborhood.toArray();
+ }
+
+ @Override
+ public String toString() {
+ return "ThresholdUserNeighborhood";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
new file mode 100644
index 0000000..d24ea6a
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
+
+import java.util.Collection;
+
+/**
+ * Abstract base implementation for retrieving candidate items to recommend
+ */
+public abstract class AbstractCandidateItemsStrategy implements CandidateItemsStrategy,
+ MostSimilarItemsCandidateItemsStrategy {
+
+ protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) throws TasteException{
+ return doGetCandidateItems(preferredItemIDs, dataModel, false);
+ }
+
+ @Override
+ public FastIDSet getCandidateItems(long userID, PreferenceArray preferencesFromUser, DataModel dataModel,
+ boolean includeKnownItems) throws TasteException {
+ return doGetCandidateItems(preferencesFromUser.getIDs(), dataModel, includeKnownItems);
+ }
+
+ @Override
+ public FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel)
+ throws TasteException {
+ return doGetCandidateItems(itemIDs, dataModel, false);
+ }
+
+ protected abstract FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel,
+ boolean includeKnownItems) throws TasteException;
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {}
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
new file mode 100644
index 0000000..3a62b08
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+
+import java.util.List;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+public abstract class AbstractRecommender implements Recommender {
+
+ private static final Logger log = LoggerFactory.getLogger(AbstractRecommender.class);
+
+ private final DataModel dataModel;
+ private final CandidateItemsStrategy candidateItemsStrategy;
+
+ protected AbstractRecommender(DataModel dataModel, CandidateItemsStrategy candidateItemsStrategy) {
+ this.dataModel = Preconditions.checkNotNull(dataModel);
+ this.candidateItemsStrategy = Preconditions.checkNotNull(candidateItemsStrategy);
+ }
+
+ protected AbstractRecommender(DataModel dataModel) {
+ this(dataModel, getDefaultCandidateItemsStrategy());
+ }
+
+ protected static CandidateItemsStrategy getDefaultCandidateItemsStrategy() {
+ return new PreferredItemsNeighborhoodCandidateItemsStrategy();
+ }
+
+
+ /**
+ * <p>
+ * Default implementation which just calls
+ * {@link Recommender#recommend(long, int, org.apache.mahout.cf.taste.recommender.IDRescorer)}, with a
+ * {@link org.apache.mahout.cf.taste.recommender.Rescorer} that does nothing.
+ * </p>
+ */
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany) throws TasteException {
+ return recommend(userID, howMany, null, false);
+ }
+
+ /**
+ * <p>
+ * Default implementation which just calls
+ * {@link Recommender#recommend(long, int, org.apache.mahout.cf.taste.recommender.IDRescorer)}, with a
+ * {@link org.apache.mahout.cf.taste.recommender.Rescorer} that does nothing.
+ * </p>
+ */
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException {
+ return recommend(userID, howMany, null, includeKnownItems);
+ }
+
+ /**
+ * <p> Delegates to {@link Recommender#recommend(long, int, IDRescorer, boolean)}
+ */
+ @Override
+ public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer) throws TasteException{
+ return recommend(userID, howMany,rescorer, false);
+ }
+
+ /**
+ * <p>
+ * Default implementation which just calls {@link DataModel#setPreference(long, long, float)}.
+ * </p>
+ *
+ * @throws IllegalArgumentException
+ * if userID or itemID is {@code null}, or if value is {@link Double#NaN}
+ */
+ @Override
+ public void setPreference(long userID, long itemID, float value) throws TasteException {
+ Preconditions.checkArgument(!Float.isNaN(value), "NaN value");
+ log.debug("Setting preference for user {}, item {}", userID, itemID);
+ dataModel.setPreference(userID, itemID, value);
+ }
+
+ /**
+ * <p>
+ * Default implementation which just calls {@link DataModel#removePreference(long, long)} (Object, Object)}.
+ * </p>
+ *
+ * @throws IllegalArgumentException
+ * if userID or itemID is {@code null}
+ */
+ @Override
+ public void removePreference(long userID, long itemID) throws TasteException {
+ log.debug("Remove preference for user '{}', item '{}'", userID, itemID);
+ dataModel.removePreference(userID, itemID);
+ }
+
+ @Override
+ public DataModel getDataModel() {
+ return dataModel;
+ }
+
+ /**
+ * @param userID
+ * ID of user being evaluated
+ * @param preferencesFromUser
+ * the preferences from the user
+ * @param includeKnownItems
+ * whether to include items already known by the user in recommendations
+ * @return all items in the {@link DataModel} for which the user has not expressed a preference and could
+ * possibly be recommended to the user
+ * @throws TasteException
+ * if an error occurs while listing items
+ */
+ protected FastIDSet getAllOtherItems(long userID, PreferenceArray preferencesFromUser, boolean includeKnownItems)
+ throws TasteException {
+ return candidateItemsStrategy.getCandidateItems(userID, preferencesFromUser, dataModel, includeKnownItems);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java
new file mode 100644
index 0000000..37389a7
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import com.google.common.base.Preconditions;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+
+/**
+ * returns the result of {@link ItemSimilarity#allSimilarItemIDs(long)} as candidate items
+ */
+public class AllSimilarItemsCandidateItemsStrategy extends AbstractCandidateItemsStrategy {
+
+ private final ItemSimilarity similarity;
+
+ public AllSimilarItemsCandidateItemsStrategy(ItemSimilarity similarity) {
+ Preconditions.checkArgument(similarity != null, "similarity is null");
+ this.similarity = similarity;
+ }
+
+ @Override
+ protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems)
+ throws TasteException {
+ FastIDSet candidateItemIDs = new FastIDSet();
+ for (long itemID : preferredItemIDs) {
+ candidateItemIDs.addAll(similarity.allSimilarItemIDs(itemID));
+ }
+ if (!includeKnownItems) {
+ candidateItemIDs.removeAll(preferredItemIDs);
+ }
+ return candidateItemIDs;
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
new file mode 100644
index 0000000..929eddd
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+public final class AllUnknownItemsCandidateItemsStrategy extends AbstractCandidateItemsStrategy {
+
+ /** return all items the user has not yet seen */
+ @Override
+ protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems)
+ throws TasteException {
+ FastIDSet possibleItemIDs = new FastIDSet(dataModel.getNumItems());
+ LongPrimitiveIterator allItemIDs = dataModel.getItemIDs();
+ while (allItemIDs.hasNext()) {
+ possibleItemIDs.add(allItemIDs.nextLong());
+ }
+ if (!includeKnownItems) {
+ possibleItemIDs.removeAll(preferredItemIDs);
+ }
+ return possibleItemIDs;
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
new file mode 100644
index 0000000..1677ea8
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+/**
+ * <p>
+ * Defines ordering on {@link RecommendedItem} by the rescored value of the recommendations' estimated
+ * preference value, from high to low.
+ * </p>
+ */
+final class ByRescoreComparator implements Comparator<RecommendedItem>, Serializable {
+
+ private final IDRescorer rescorer;
+
+ ByRescoreComparator(IDRescorer rescorer) {
+ this.rescorer = rescorer;
+ }
+
+ @Override
+ public int compare(RecommendedItem o1, RecommendedItem o2) {
+ double rescored1;
+ double rescored2;
+ if (rescorer == null) {
+ rescored1 = o1.getValue();
+ rescored2 = o2.getValue();
+ } else {
+ rescored1 = rescorer.rescore(o1.getItemID(), o1.getValue());
+ rescored2 = rescorer.rescore(o2.getItemID(), o2.getValue());
+ }
+ if (rescored1 < rescored2) {
+ return 1;
+ } else if (rescored1 > rescored2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "ByRescoreComparator[rescorer:" + rescorer + ']';
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java
new file mode 100644
index 0000000..57c5f3d
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+/**
+ * Defines a natural ordering from most-preferred item (highest value) to least-preferred.
+ */
+public final class ByValueRecommendedItemComparator implements Comparator<RecommendedItem>, Serializable {
+
+ private static final Comparator<RecommendedItem> INSTANCE = new ByValueRecommendedItemComparator();
+
+ public static Comparator<RecommendedItem> getInstance() {
+ return INSTANCE;
+ }
+
+ @Override
+ public int compare(RecommendedItem o1, RecommendedItem o2) {
+ float value1 = o1.getValue();
+ float value2 = o2.getValue();
+ return value1 > value2 ? -1 : value1 < value2 ? 1 : 0;
+ }
+
+}