You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/04 14:29:46 UTC

[44/53] [abbrv] [partial] mahout git commit: end of day 6-2-2018

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserDataModel.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserDataModel.java
new file mode 100644
index 0000000..546349b
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserDataModel.java
@@ -0,0 +1,320 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * This {@link DataModel} decorator class is useful in a situation where you wish to recommend to a user that
+ * doesn't really exist yet in your actual {@link DataModel}. For example maybe you wish to recommend DVDs to
+ * a user who has browsed a few titles on your DVD store site, but, the user is not yet registered.
+ * </p>
+ *
+ * <p>
+ * This enables you to temporarily add a temporary user to an existing {@link DataModel} in a way that
+ * recommenders can then produce recommendations anyway. To do so, wrap your real implementation in this
+ * class:
+ * </p>
+ *
+ * <p>
+ *
+ * <pre>
+ * DataModel realModel = ...;
+ * DataModel plusModel = new PlusAnonymousUserDataModel(realModel);
+ * ...
+ * ItemSimilarity similarity = new LogLikelihoodSimilarity(realModel); // not plusModel
+ * </pre>
+ *
+ * </p>
+ *
+ * <p>
+ * But, you may continue to use {@code realModel} as input to other components. To recommend, first construct and
+ * set the temporary user information on the model and then simply call the recommender. The
+ * {@code synchronized} block exists to remind you that this is of course not thread-safe. Only one set
+ * of temp data can be inserted into the model and used at one time.
+ * </p>
+ *
+ * <p>
+ *
+ * <pre>
+ * Recommender recommender = ...;
+ * ...
+ * synchronized(...) {
+ *   PreferenceArray tempPrefs = ...;
+ *   plusModel.setTempPrefs(tempPrefs);
+ *   recommender.recommend(PlusAnonymousUserDataModel.TEMP_USER_ID, 10);
+ *   plusModel.setTempPrefs(null);
+ * }
+ * </pre>
+ *
+ * </p>
+ */
+public class PlusAnonymousUserDataModel implements DataModel {
+
+  public static final long TEMP_USER_ID = Long.MIN_VALUE;
+  
+  private final DataModel delegate;
+  private PreferenceArray tempPrefs;
+  private final FastIDSet prefItemIDs;
+
+  private static final Logger log = LoggerFactory.getLogger(PlusAnonymousUserDataModel.class);
+
+  public PlusAnonymousUserDataModel(DataModel delegate) {
+    this.delegate = delegate;
+    this.prefItemIDs = new FastIDSet();
+  }
+
+  protected DataModel getDelegate() {
+    return delegate;
+  }
+  
+  public void setTempPrefs(PreferenceArray prefs) {
+    Preconditions.checkArgument(prefs != null && prefs.length() > 0, "prefs is null or empty");
+    this.tempPrefs = prefs;
+    this.prefItemIDs.clear();
+    for (int i = 0; i < prefs.length(); i++) {
+      this.prefItemIDs.add(prefs.getItemID(i));
+    }
+  }
+
+  public void clearTempPrefs() {
+    tempPrefs = null;
+    prefItemIDs.clear();
+  }
+  
+  @Override
+  public LongPrimitiveIterator getUserIDs() throws TasteException {
+    if (tempPrefs == null) {
+      return delegate.getUserIDs();
+    }
+    return new PlusAnonymousUserLongPrimitiveIterator(delegate.getUserIDs(), TEMP_USER_ID);
+  }
+  
+  @Override
+  public PreferenceArray getPreferencesFromUser(long userID) throws TasteException {
+    if (userID == TEMP_USER_ID) {
+      if (tempPrefs == null) {
+        throw new NoSuchUserException(TEMP_USER_ID);
+      }
+      return tempPrefs;
+    }
+    return delegate.getPreferencesFromUser(userID);
+  }
+  
+  @Override
+  public FastIDSet getItemIDsFromUser(long userID) throws TasteException {
+    if (userID == TEMP_USER_ID) {
+      if (tempPrefs == null) {
+        throw new NoSuchUserException(TEMP_USER_ID);
+      }
+      return prefItemIDs;
+    }
+    return delegate.getItemIDsFromUser(userID);
+  }
+  
+  @Override
+  public LongPrimitiveIterator getItemIDs() throws TasteException {
+    return delegate.getItemIDs();
+    // Yeah ignoring items that only the plus-one user knows about... can't really happen
+  }
+  
+  @Override
+  public PreferenceArray getPreferencesForItem(long itemID) throws TasteException {
+    if (tempPrefs == null) {
+      return delegate.getPreferencesForItem(itemID);
+    }
+    PreferenceArray delegatePrefs = null;
+    try {
+      delegatePrefs = delegate.getPreferencesForItem(itemID);
+    } catch (NoSuchItemException nsie) {
+      // OK. Probably an item that only the anonymous user has
+      if (log.isDebugEnabled()) {
+        log.debug("Item {} unknown", itemID);
+      }
+    }
+    for (int i = 0; i < tempPrefs.length(); i++) {
+      if (tempPrefs.getItemID(i) == itemID) {
+        return cloneAndMergeInto(delegatePrefs, itemID, tempPrefs.getUserID(i), tempPrefs.getValue(i));
+      }
+    }
+    if (delegatePrefs == null) {
+      // No, didn't find it among the anonymous user prefs
+      throw new NoSuchItemException(itemID);
+    }
+    return delegatePrefs;
+  }
+
+  private static PreferenceArray cloneAndMergeInto(PreferenceArray delegatePrefs,
+                                                   long itemID,
+                                                   long newUserID,
+                                                   float value) {
+
+    int length = delegatePrefs == null ? 0 : delegatePrefs.length();
+    int newLength = length + 1;
+    PreferenceArray newPreferenceArray = new GenericItemPreferenceArray(newLength);
+
+    // Set item ID once
+    newPreferenceArray.setItemID(0, itemID);
+
+    int positionToInsert = 0;
+    while (positionToInsert < length && newUserID > delegatePrefs.getUserID(positionToInsert)) {
+      positionToInsert++;
+    }
+
+    for (int i = 0; i < positionToInsert; i++) {
+      newPreferenceArray.setUserID(i, delegatePrefs.getUserID(i));
+      newPreferenceArray.setValue(i, delegatePrefs.getValue(i));
+    }
+    newPreferenceArray.setUserID(positionToInsert, newUserID);
+    newPreferenceArray.setValue(positionToInsert, value);
+    for (int i = positionToInsert + 1; i < newLength; i++) {
+      newPreferenceArray.setUserID(i, delegatePrefs.getUserID(i - 1));
+      newPreferenceArray.setValue(i, delegatePrefs.getValue(i - 1));
+    }
+
+    return newPreferenceArray;
+  }
+  
+  @Override
+  public Float getPreferenceValue(long userID, long itemID) throws TasteException {
+    if (userID == TEMP_USER_ID) {
+      if (tempPrefs == null) {
+        throw new NoSuchUserException(TEMP_USER_ID);
+      }
+      for (int i = 0; i < tempPrefs.length(); i++) {
+        if (tempPrefs.getItemID(i) == itemID) {
+          return tempPrefs.getValue(i);
+        }
+      }
+      return null;
+    }
+    return delegate.getPreferenceValue(userID, itemID);
+  }
+
+  @Override
+  public Long getPreferenceTime(long userID, long itemID) throws TasteException {
+    if (userID == TEMP_USER_ID) {
+      if (tempPrefs == null) {
+        throw new NoSuchUserException(TEMP_USER_ID);
+      }
+      return null;
+    }
+    return delegate.getPreferenceTime(userID, itemID);
+  }
+  
+  @Override
+  public int getNumItems() throws TasteException {
+    return delegate.getNumItems();
+  }
+  
+  @Override
+  public int getNumUsers() throws TasteException {
+    return delegate.getNumUsers() + (tempPrefs == null ? 0 : 1);
+  }
+  
+  @Override
+  public int getNumUsersWithPreferenceFor(long itemID) throws TasteException {
+    if (tempPrefs == null) {
+      return delegate.getNumUsersWithPreferenceFor(itemID);
+    }
+    boolean found = false;
+    for (int i = 0; i < tempPrefs.length(); i++) {
+      if (tempPrefs.getItemID(i) == itemID) {
+        found = true;
+        break;
+      }
+    }
+    return delegate.getNumUsersWithPreferenceFor(itemID) + (found ? 1 : 0);
+  }
+
+  @Override
+  public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException {
+    if (tempPrefs == null) {
+      return delegate.getNumUsersWithPreferenceFor(itemID1, itemID2);
+    }
+    boolean found1 = false;
+    boolean found2 = false;
+    for (int i = 0; i < tempPrefs.length() && !(found1 && found2); i++) {
+      long itemID = tempPrefs.getItemID(i);
+      if (itemID == itemID1) {
+        found1 = true;
+      }
+      if (itemID == itemID2) {
+        found2 = true;
+      }
+    }
+    return delegate.getNumUsersWithPreferenceFor(itemID1, itemID2) + (found1 && found2 ? 1 : 0);
+  }
+  
+  @Override
+  public void setPreference(long userID, long itemID, float value) throws TasteException {
+    if (userID == TEMP_USER_ID) {
+      if (tempPrefs == null) {
+        throw new NoSuchUserException(TEMP_USER_ID);
+      }
+      throw new UnsupportedOperationException();
+    }
+    delegate.setPreference(userID, itemID, value);
+  }
+  
+  @Override
+  public void removePreference(long userID, long itemID) throws TasteException {
+    if (userID == TEMP_USER_ID) {
+      if (tempPrefs == null) {
+        throw new NoSuchUserException(TEMP_USER_ID);
+      }
+      throw new UnsupportedOperationException();
+    }
+    delegate.removePreference(userID, itemID);
+  }
+  
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    delegate.refresh(alreadyRefreshed);
+  }
+
+  @Override
+  public boolean hasPreferenceValues() {
+    return delegate.hasPreferenceValues();
+  }
+
+  @Override
+  public float getMaxPreference() {
+    return delegate.getMaxPreference();
+  }
+
+  @Override
+  public float getMinPreference() {
+    return delegate.getMinPreference();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java
new file mode 100644
index 0000000..ea4df85
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import org.apache.mahout.cf.taste.impl.common.AbstractLongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+
+final class PlusAnonymousUserLongPrimitiveIterator extends AbstractLongPrimitiveIterator {
+  
+  private final LongPrimitiveIterator delegate;
+  private final long extraDatum;
+  private boolean datumConsumed;
+  
+  PlusAnonymousUserLongPrimitiveIterator(LongPrimitiveIterator delegate, long extraDatum) {
+    this.delegate = delegate;
+    this.extraDatum = extraDatum;
+    datumConsumed = false;
+  }
+  
+  @Override
+  public long nextLong() {
+    if (datumConsumed) {
+      return delegate.nextLong();
+    } else {
+      if (delegate.hasNext()) {
+        long delegateNext = delegate.peek();
+        if (extraDatum <= delegateNext) {
+          datumConsumed = true;
+          return extraDatum;
+        } else {
+          return delegate.next();
+        }
+      } else {
+        datumConsumed = true;
+        return extraDatum;
+      }
+    }
+  }
+  
+  @Override
+  public long peek() {
+    if (datumConsumed) {
+      return delegate.peek();
+    } else {
+      if (delegate.hasNext()) {
+        long delegateNext = delegate.peek();
+        if (extraDatum <= delegateNext) {
+          return extraDatum;
+        } else {
+          return delegateNext;
+        }
+      } else {
+        return extraDatum;
+      }
+    }
+  }
+  
+  @Override
+  public boolean hasNext() {
+    return !datumConsumed || delegate.hasNext();
+  }
+  
+  @Override
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+  
+  @Override
+  public void skip(int n) {
+    for (int i = 0; i < n; i++) {
+      nextLong();
+    }
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
new file mode 100644
index 0000000..0399618
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
@@ -0,0 +1,758 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.file;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.concurrent.locks.ReentrantLock;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Splitter;
+import com.google.common.io.Closeables;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.model.AbstractDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.iterator.FileLineIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * A {@link DataModel} backed by a delimited file. This class expects a file where each line
+ * contains a user ID, followed by item ID, followed by optional preference value, followed by
+ * optional timestamp. Commas or tabs delimit fields:
+ * </p>
+ *
+ * <p>{@code userID,itemID[,preference[,timestamp]]}</p>
+ *
+ * <p>
+ * Preference value is optional to accommodate applications that have no notion of a
+ * preference value (that is, the user simply expresses a
+ * preference for an item, but no degree of preference).
+ * </p>
+ *
+ * <p>
+ * The preference value is assumed to be parseable as a {@code double}. The user IDs and item IDs are
+ * read parsed as {@code long}s. The timestamp, if present, is assumed to be parseable as a
+ * {@code long}, though this can be overridden via {@link #readTimestampFromString(String)}.
+ * The preference value may be empty, to indicate "no preference value", but cannot be empty. That is,
+ * this is legal:
+ * </p>
+ *
+ * <p>{@code 123,456,,129050099059}</p>
+ *
+ * <p>But this isn't:</p>
+ *
+ * <p>{@code 123,456,129050099059}</p>
+ *
+ * <p>
+ * It is also acceptable for the lines to contain additional fields. Fields beyond the third will be ignored.
+ * An empty line, or one that begins with '#' will be ignored as a comment.
+ * </p>
+ *
+ * <p>
+ * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file
+ * has been reloaded very recently already.
+ * </p>
+ *
+ * <p>
+ * This class will also look for update "delta" files in the same directory, with file names that start the
+ * same way (up to the first period). These files have the same format, and provide updated data that
+ * supersedes what is in the main data file. This is a mechanism that allows an application to push updates to
+ * {@link FileDataModel} without re-copying the entire data file.
+ * </p>
+ *
+ * <p>
+ * One small format difference exists. Update files must also be able to express deletes.
+ * This is done by ending with a blank preference value, as in "123,456,".
+ * </p>
+ *
+ * <p>
+ * Note that it's all-or-nothing -- all of the items in the file must express no preference, or the all must.
+ * These cannot be mixed. Put another way there will always be the same number of delimiters on every line of
+ * the file!
+ * </p>
+ *
+ * <p>
+ * This class is not intended for use with very large amounts of data (over, say, tens of millions of rows).
+ * For that, a JDBC-backed {@link DataModel} and a database are more appropriate.
+ * </p>
+ *
+ * <p>
+ * It is possible and likely useful to subclass this class and customize its behavior to accommodate
+ * application-specific needs and input formats. See {@link #processLine(String, FastByIDMap, FastByIDMap, boolean)} and
+ * {@link #processLineWithoutID(String, FastByIDMap, FastByIDMap)}
+ */
+public class FileDataModel extends AbstractDataModel {
+
+  private static final Logger log = LoggerFactory.getLogger(FileDataModel.class);
+
+  public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
+  private static final char COMMENT_CHAR = '#';
+  private static final char[] DELIMIETERS = {',', '\t'};
+
+  private final File dataFile;
+  private long lastModified;
+  private long lastUpdateFileModified;
+  private final transient Splitter delimiterPattern;
+  private final boolean hasPrefValues;
+  private DataModel delegate;
+  private final ReentrantLock reloadLock;
+  private final boolean transpose;
+  private final long minReloadIntervalMS;
+
+  /**
+   * @param dataFile
+   *          file containing preferences data. If file is compressed (and name ends in .gz or .zip
+   *          accordingly) it will be decompressed as it is read)
+   * @throws FileNotFoundException
+   *           if dataFile does not exist
+   * @throws IOException
+   *           if file can't be read
+   */
+  public FileDataModel(File dataFile) throws IOException {
+    this(dataFile, false, DEFAULT_MIN_RELOAD_INTERVAL_MS);
+  }
+  
+  /**
+   * @param delimiterRegex If your data file don't use '\t' or ',' as delimiter, you can specify 
+   * a custom regex pattern.
+   */
+  public FileDataModel(File dataFile, String delimiterRegex) throws IOException {
+    this(dataFile, false, DEFAULT_MIN_RELOAD_INTERVAL_MS, delimiterRegex);
+  }
+  
+  /**
+   * @param transpose
+   *          transposes user IDs and item IDs -- convenient for 'flipping' the data model this way
+   * @param minReloadIntervalMS
+   *  the minimum interval in milliseconds after which a full reload of the original datafile is done
+   *  when refresh() is called
+   * @see #FileDataModel(File)
+   */
+  public FileDataModel(File dataFile, boolean transpose, long minReloadIntervalMS) throws IOException {
+    this(dataFile, transpose, minReloadIntervalMS, null);
+  }
+  
+  /**
+   * @param delimiterRegex If your data file don't use '\t' or ',' as delimiters, you can specify 
+   * user own using regex pattern.
+   * @throws IOException
+   */
+  public FileDataModel(File dataFile, boolean transpose, long minReloadIntervalMS, String delimiterRegex)
+    throws IOException {
+
+    this.dataFile = Preconditions.checkNotNull(dataFile.getAbsoluteFile());
+    if (!dataFile.exists() || dataFile.isDirectory()) {
+      throw new FileNotFoundException(dataFile.toString());
+    }
+    Preconditions.checkArgument(dataFile.length() > 0L, "dataFile is empty");
+    Preconditions.checkArgument(minReloadIntervalMS >= 0L, "minReloadIntervalMs must be non-negative");
+
+    log.info("Creating FileDataModel for file {}", dataFile);
+
+    this.lastModified = dataFile.lastModified();
+    this.lastUpdateFileModified = readLastUpdateFileModified();
+
+    FileLineIterator iterator = new FileLineIterator(dataFile, false);
+    String firstLine = iterator.peek();
+    while (firstLine.isEmpty() || firstLine.charAt(0) == COMMENT_CHAR) {
+      iterator.next();
+      firstLine = iterator.peek();
+    }
+    Closeables.close(iterator, true);
+
+    char delimiter;
+    if (delimiterRegex == null) {
+      delimiter = determineDelimiter(firstLine);
+      delimiterPattern = Splitter.on(delimiter);
+    } else {
+      delimiter = '\0';
+      delimiterPattern = Splitter.onPattern(delimiterRegex);
+      if (!delimiterPattern.split(firstLine).iterator().hasNext()) {
+        throw new IllegalArgumentException("Did not find a delimiter(pattern) in first line");
+      }
+    }
+    List<String> firstLineSplit = new ArrayList<>();
+    for (String token : delimiterPattern.split(firstLine)) {
+      firstLineSplit.add(token);
+    }
+    // If preference value exists and isn't empty then the file is specifying pref values
+    hasPrefValues = firstLineSplit.size() >= 3 && !firstLineSplit.get(2).isEmpty();
+
+    this.reloadLock = new ReentrantLock();
+    this.transpose = transpose;
+    this.minReloadIntervalMS = minReloadIntervalMS;
+
+    reload();
+  }
+
+  public File getDataFile() {
+    return dataFile;
+  }
+
+  protected void reload() {
+    if (reloadLock.tryLock()) {
+      try {
+        delegate = buildModel();
+      } catch (IOException ioe) {
+        log.warn("Exception while reloading", ioe);
+      } finally {
+        reloadLock.unlock();
+      }
+    }
+  }
+
+  protected DataModel buildModel() throws IOException {
+
+    long newLastModified = dataFile.lastModified();
+    long newLastUpdateFileModified = readLastUpdateFileModified();
+
+    boolean loadFreshData = delegate == null || newLastModified > lastModified + minReloadIntervalMS;
+
+    long oldLastUpdateFileModifieid = lastUpdateFileModified;
+    lastModified = newLastModified;
+    lastUpdateFileModified = newLastUpdateFileModified;
+
+    FastByIDMap<FastByIDMap<Long>> timestamps = new FastByIDMap<>();
+
+    if (hasPrefValues) {
+
+      if (loadFreshData) {
+
+        FastByIDMap<Collection<Preference>> data = new FastByIDMap<>();
+        FileLineIterator iterator = new FileLineIterator(dataFile, false);
+        processFile(iterator, data, timestamps, false);
+
+        for (File updateFile : findUpdateFilesAfter(newLastModified)) {
+          processFile(new FileLineIterator(updateFile, false), data, timestamps, false);
+        }
+
+        return new GenericDataModel(GenericDataModel.toDataMap(data, true), timestamps);
+
+      } else {
+
+        FastByIDMap<PreferenceArray> rawData = ((GenericDataModel) delegate).getRawUserData();
+
+        for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) {
+          processFile(new FileLineIterator(updateFile, false), rawData, timestamps, true);
+        }
+
+        return new GenericDataModel(rawData, timestamps);
+
+      }
+
+    } else {
+
+      if (loadFreshData) {
+
+        FastByIDMap<FastIDSet> data = new FastByIDMap<>();
+        FileLineIterator iterator = new FileLineIterator(dataFile, false);
+        processFileWithoutID(iterator, data, timestamps);
+
+        for (File updateFile : findUpdateFilesAfter(newLastModified)) {
+          processFileWithoutID(new FileLineIterator(updateFile, false), data, timestamps);
+        }
+
+        return new GenericBooleanPrefDataModel(data, timestamps);
+
+      } else {
+
+        FastByIDMap<FastIDSet> rawData = ((GenericBooleanPrefDataModel) delegate).getRawUserData();
+
+        for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) {
+          processFileWithoutID(new FileLineIterator(updateFile, false), rawData, timestamps);
+        }
+
+        return new GenericBooleanPrefDataModel(rawData, timestamps);
+
+      }
+
+    }
+  }
+
+  /**
+   * Finds update delta files in the same directory as the data file. This finds any file whose name starts
+   * the same way as the data file (up to first period) but isn't the data file itself. For example, if the
+   * data file is /foo/data.txt.gz, you might place update files at /foo/data.1.txt.gz, /foo/data.2.txt.gz,
+   * etc.
+   */
+  private Iterable<File> findUpdateFilesAfter(long minimumLastModified) {
+    String dataFileName = dataFile.getName();
+    int period = dataFileName.indexOf('.');
+    String startName = period < 0 ? dataFileName : dataFileName.substring(0, period);
+    File parentDir = dataFile.getParentFile();
+    Map<Long, File> modTimeToUpdateFile = new TreeMap<>();
+    FileFilter onlyFiles = new FileFilter() {
+      @Override
+      public boolean accept(File file) {
+        return !file.isDirectory();
+      }
+    };
+    for (File updateFile : parentDir.listFiles(onlyFiles)) {
+      String updateFileName = updateFile.getName();
+      if (updateFileName.startsWith(startName)
+          && !updateFileName.equals(dataFileName)
+          && updateFile.lastModified() >= minimumLastModified) {
+        modTimeToUpdateFile.put(updateFile.lastModified(), updateFile);
+      }
+    }
+    return modTimeToUpdateFile.values();
+  }
+
+  private long readLastUpdateFileModified() {
+    long mostRecentModification = Long.MIN_VALUE;
+    for (File updateFile : findUpdateFilesAfter(0L)) {
+      mostRecentModification = Math.max(mostRecentModification, updateFile.lastModified());
+    }
+    return mostRecentModification;
+  }
+
+  public static char determineDelimiter(String line) {
+    for (char possibleDelimieter : DELIMIETERS) {
+      if (line.indexOf(possibleDelimieter) >= 0) {
+        return possibleDelimieter;
+      }
+    }
+    throw new IllegalArgumentException("Did not find a delimiter in first line");
+  }
+
+  protected void processFile(FileLineIterator dataOrUpdateFileIterator,
+                             FastByIDMap<?> data,
+                             FastByIDMap<FastByIDMap<Long>> timestamps,
+                             boolean fromPriorData) {
+    log.info("Reading file info...");
+    int count = 0;
+    while (dataOrUpdateFileIterator.hasNext()) {
+      String line = dataOrUpdateFileIterator.next();
+      if (!line.isEmpty()) {
+        processLine(line, data, timestamps, fromPriorData);
+        if (++count % 1000000 == 0) {
+          log.info("Processed {} lines", count);
+        }
+      }
+    }
+    log.info("Read lines: {}", count);
+  }
+
+  /**
+   * <p>
+   * Reads one line from the input file and adds the data to a {@link FastByIDMap} data structure which maps user IDs
+   * to preferences. This assumes that each line of the input file corresponds to one preference. After
+   * reading a line and determining which user and item the preference pertains to, the method should look to
+   * see if the data contains a mapping for the user ID already, and if not, add an empty data structure of preferences
+   * as appropriate to the data.
+   * </p>
+   *
+   * <p>
+   * Note that if the line is empty or begins with '#' it will be ignored as a comment.
+   * </p>
+   *
+   * @param line
+   *          line from input data file
+   * @param data
+   *          all data read so far, as a mapping from user IDs to preferences
+   * @param fromPriorData an implementation detail -- if true, data will map IDs to
+   *  {@link PreferenceArray} since the framework is attempting to read and update raw
+   *  data that is already in memory. Otherwise it maps to {@link Collection}s of
+   *  {@link Preference}s, since it's reading fresh data. Subclasses must be prepared
+   *  to handle this wrinkle.
+   */
+  protected void processLine(String line,
+                             FastByIDMap<?> data, 
+                             FastByIDMap<FastByIDMap<Long>> timestamps,
+                             boolean fromPriorData) {
+
+    // Ignore empty lines and comments
+    if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) {
+      return;
+    }
+
+    Iterator<String> tokens = delimiterPattern.split(line).iterator();
+    String userIDString = tokens.next();
+    String itemIDString = tokens.next();
+    String preferenceValueString = tokens.next();
+    boolean hasTimestamp = tokens.hasNext();
+    String timestampString = hasTimestamp ? tokens.next() : null;
+
+    long userID = readUserIDFromString(userIDString);
+    long itemID = readItemIDFromString(itemIDString);
+
+    if (transpose) {
+      long tmp = userID;
+      userID = itemID;
+      itemID = tmp;
+    }
+
+    // This is kind of gross but need to handle two types of storage
+    Object maybePrefs = data.get(userID);
+    if (fromPriorData) {
+      // Data are PreferenceArray
+
+      PreferenceArray prefs = (PreferenceArray) maybePrefs;
+      if (!hasTimestamp && preferenceValueString.isEmpty()) {
+        // Then line is of form "userID,itemID,", meaning remove
+        if (prefs != null) {
+          boolean exists = false;
+          int length = prefs.length();
+          for (int i = 0; i < length; i++) {
+            if (prefs.getItemID(i) == itemID) {
+              exists = true;
+              break;
+            }
+          }
+          if (exists) {
+            if (length == 1) {
+              data.remove(userID);
+            } else {
+              PreferenceArray newPrefs = new GenericUserPreferenceArray(length - 1);
+              for (int i = 0, j = 0; i < length; i++, j++) {
+                if (prefs.getItemID(i) == itemID) {
+                  j--;
+                } else {
+                  newPrefs.set(j, prefs.get(i));
+                }
+              }
+              ((FastByIDMap<PreferenceArray>) data).put(userID, newPrefs);
+            }
+          }
+        }
+
+        removeTimestamp(userID, itemID, timestamps);
+
+      } else {
+
+        float preferenceValue = Float.parseFloat(preferenceValueString);
+
+        boolean exists = false;
+        if (prefs != null) {
+          for (int i = 0; i < prefs.length(); i++) {
+            if (prefs.getItemID(i) == itemID) {
+              exists = true;
+              prefs.setValue(i, preferenceValue);
+              break;
+            }
+          }
+        }
+
+        if (!exists) {
+          if (prefs == null) {
+            prefs = new GenericUserPreferenceArray(1);
+          } else {
+            PreferenceArray newPrefs = new GenericUserPreferenceArray(prefs.length() + 1);
+            for (int i = 0, j = 1; i < prefs.length(); i++, j++) {
+              newPrefs.set(j, prefs.get(i));
+            }
+            prefs = newPrefs;
+          }
+          prefs.setUserID(0, userID);
+          prefs.setItemID(0, itemID);
+          prefs.setValue(0, preferenceValue);
+          ((FastByIDMap<PreferenceArray>) data).put(userID, prefs);          
+        }
+      }
+
+      addTimestamp(userID, itemID, timestampString, timestamps);
+
+    } else {
+      // Data are Collection<Preference>
+
+      Collection<Preference> prefs = (Collection<Preference>) maybePrefs;
+
+      if (!hasTimestamp && preferenceValueString.isEmpty()) {
+        // Then line is of form "userID,itemID,", meaning remove
+        if (prefs != null) {
+          // remove pref
+          Iterator<Preference> prefsIterator = prefs.iterator();
+          while (prefsIterator.hasNext()) {
+            Preference pref = prefsIterator.next();
+            if (pref.getItemID() == itemID) {
+              prefsIterator.remove();
+              break;
+            }
+          }
+        }
+
+        removeTimestamp(userID, itemID, timestamps);
+        
+      } else {
+
+        float preferenceValue = Float.parseFloat(preferenceValueString);
+
+        boolean exists = false;
+        if (prefs != null) {
+          for (Preference pref : prefs) {
+            if (pref.getItemID() == itemID) {
+              exists = true;
+              pref.setValue(preferenceValue);
+              break;
+            }
+          }
+        }
+
+        if (!exists) {
+          if (prefs == null) {
+            prefs = new ArrayList<>(2);
+            ((FastByIDMap<Collection<Preference>>) data).put(userID, prefs);
+          }
+          prefs.add(new GenericPreference(userID, itemID, preferenceValue));
+        }
+
+        addTimestamp(userID, itemID, timestampString, timestamps);
+
+      }
+
+    }
+  }
+
+  protected void processFileWithoutID(FileLineIterator dataOrUpdateFileIterator,
+                                      FastByIDMap<FastIDSet> data,
+                                      FastByIDMap<FastByIDMap<Long>> timestamps) {
+    log.info("Reading file info...");
+    int count = 0;
+    while (dataOrUpdateFileIterator.hasNext()) {
+      String line = dataOrUpdateFileIterator.next();
+      if (!line.isEmpty()) {
+        processLineWithoutID(line, data, timestamps);
+        if (++count % 100000 == 0) {
+          log.info("Processed {} lines", count);
+        }
+      }
+    }
+    log.info("Read lines: {}", count);
+  }
+
+  protected void processLineWithoutID(String line,
+                                      FastByIDMap<FastIDSet> data,
+                                      FastByIDMap<FastByIDMap<Long>> timestamps) {
+
+    if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) {
+      return;
+    }
+
+    Iterator<String> tokens = delimiterPattern.split(line).iterator();
+    String userIDString = tokens.next();
+    String itemIDString = tokens.next();
+    boolean hasPreference = tokens.hasNext();
+    String preferenceValueString = hasPreference ? tokens.next() : "";
+    boolean hasTimestamp = tokens.hasNext();
+    String timestampString = hasTimestamp ? tokens.next() : null;
+
+    long userID = readUserIDFromString(userIDString);
+    long itemID = readItemIDFromString(itemIDString);
+
+    if (transpose) {
+      long tmp = userID;
+      userID = itemID;
+      itemID = tmp;
+    }
+
+    if (hasPreference && !hasTimestamp && preferenceValueString.isEmpty()) {
+      // Then line is of form "userID,itemID,", meaning remove
+
+      FastIDSet itemIDs = data.get(userID);
+      if (itemIDs != null) {
+        itemIDs.remove(itemID);
+      }
+
+      removeTimestamp(userID, itemID, timestamps);
+
+    } else {
+
+      FastIDSet itemIDs = data.get(userID);
+      if (itemIDs == null) {
+        itemIDs = new FastIDSet(2);
+        data.put(userID, itemIDs);
+      }
+      itemIDs.add(itemID);
+
+      addTimestamp(userID, itemID, timestampString, timestamps);
+
+    }
+  }
+
+  private void addTimestamp(long userID,
+                            long itemID,
+                            String timestampString,
+                            FastByIDMap<FastByIDMap<Long>> timestamps) {
+    if (timestampString != null) {
+      FastByIDMap<Long> itemTimestamps = timestamps.get(userID);
+      if (itemTimestamps == null) {
+        itemTimestamps = new FastByIDMap<>();
+        timestamps.put(userID, itemTimestamps);
+      }
+      long timestamp = readTimestampFromString(timestampString);
+      itemTimestamps.put(itemID, timestamp);
+    }
+  }
+
+  private static void removeTimestamp(long userID,
+                                      long itemID,
+                                      FastByIDMap<FastByIDMap<Long>> timestamps) {
+    FastByIDMap<Long> itemTimestamps = timestamps.get(userID);
+    if (itemTimestamps != null) {
+      itemTimestamps.remove(itemID);
+    }
+  }
+
+  /**
+   * Subclasses may wish to override this if ID values in the file are not numeric. This provides a hook by
+   * which subclasses can inject an {@link org.apache.mahout.cf.taste.model.IDMigrator} to perform
+   * translation.
+   */
+  protected long readUserIDFromString(String value) {
+    return Long.parseLong(value);
+  }
+
+  /**
+   * Subclasses may wish to override this if ID values in the file are not numeric. This provides a hook by
+   * which subclasses can inject an {@link org.apache.mahout.cf.taste.model.IDMigrator} to perform
+   * translation.
+   */
+  protected long readItemIDFromString(String value) {
+    return Long.parseLong(value);
+  }
+
+  /**
+   * Subclasses may wish to override this to change how time values in the input file are parsed.
+   * By default they are expected to be numeric, expressing a time as milliseconds since the epoch.
+   */
+  protected long readTimestampFromString(String value) {
+    return Long.parseLong(value);
+  }
+
+  @Override
+  public LongPrimitiveIterator getUserIDs() throws TasteException {
+    return delegate.getUserIDs();
+  }
+
+  @Override
+  public PreferenceArray getPreferencesFromUser(long userID) throws TasteException {
+    return delegate.getPreferencesFromUser(userID);
+  }
+
+  @Override
+  public FastIDSet getItemIDsFromUser(long userID) throws TasteException {
+    return delegate.getItemIDsFromUser(userID);
+  }
+
+  @Override
+  public LongPrimitiveIterator getItemIDs() throws TasteException {
+    return delegate.getItemIDs();
+  }
+
+  @Override
+  public PreferenceArray getPreferencesForItem(long itemID) throws TasteException {
+    return delegate.getPreferencesForItem(itemID);
+  }
+
+  @Override
+  public Float getPreferenceValue(long userID, long itemID) throws TasteException {
+    return delegate.getPreferenceValue(userID, itemID);
+  }
+
+  @Override
+  public Long getPreferenceTime(long userID, long itemID) throws TasteException {
+    return delegate.getPreferenceTime(userID, itemID);
+  }
+
+  @Override
+  public int getNumItems() throws TasteException {
+    return delegate.getNumItems();
+  }
+
+  @Override
+  public int getNumUsers() throws TasteException {
+    return delegate.getNumUsers();
+  }
+
+  @Override
+  public int getNumUsersWithPreferenceFor(long itemID) throws TasteException {
+    return delegate.getNumUsersWithPreferenceFor(itemID);
+  }
+
+  @Override
+  public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException {
+    return delegate.getNumUsersWithPreferenceFor(itemID1, itemID2);
+  }
+
+  /**
+   * Note that this method only updates the in-memory preference data that this {@link FileDataModel}
+   * maintains; it does not modify any data on disk. Therefore any updates from this method are only
+   * temporary, and lost when data is reloaded from a file. This method should also be considered relatively
+   * slow.
+   */
+  @Override
+  public void setPreference(long userID, long itemID, float value) throws TasteException {
+    delegate.setPreference(userID, itemID, value);
+  }
+
+  /** See the warning at {@link #setPreference(long, long, float)}. */
+  @Override
+  public void removePreference(long userID, long itemID) throws TasteException {
+    delegate.removePreference(userID, itemID);
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    if (dataFile.lastModified() > lastModified + minReloadIntervalMS
+        || readLastUpdateFileModified() > lastUpdateFileModified + minReloadIntervalMS) {
+      log.debug("File has changed; reloading...");
+      reload();
+    }
+  }
+
+  @Override
+  public boolean hasPreferenceValues() {
+    return delegate.hasPreferenceValues();
+  }
+
+  @Override
+  public float getMaxPreference() {
+    return delegate.getMaxPreference();
+  }
+
+  @Override
+  public float getMinPreference() {
+    return delegate.getMinPreference();
+  }
+
+  @Override
+  public String toString() {
+    return "FileDataModel[dataFile:" + dataFile + ']';
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java
new file mode 100644
index 0000000..1bcb4ef
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.file;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.concurrent.locks.ReentrantLock;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.model.AbstractIDMigrator;
+import org.apache.mahout.common.iterator.FileLineIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * An {@link org.apache.mahout.cf.taste.model.IDMigrator} backed by a file.
+ * This class typically expects a file where each line
+ * contains a single stringID to be stored in this migrator.
+ * </p>
+ *
+ * <p>
+ * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file
+ * has been reloaded very recently already.
+ * </p>
+ */
+public class FileIDMigrator extends AbstractIDMigrator {
+
+  public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
+
+  private final File dataFile;
+  private FastByIDMap<String> longToString;
+  private final ReentrantLock reloadLock;
+
+  private long lastModified;
+  private final long minReloadIntervalMS;
+
+  private static final Logger log = LoggerFactory.getLogger(FileIDMigrator.class);
+
+  public FileIDMigrator(File dataFile) throws FileNotFoundException {
+    this(dataFile, DEFAULT_MIN_RELOAD_INTERVAL_MS);
+  }
+
+  public FileIDMigrator(File dataFile, long minReloadIntervalMS) throws FileNotFoundException {
+    longToString = new FastByIDMap<>(100);
+    this.dataFile = Preconditions.checkNotNull(dataFile);
+    if (!dataFile.exists() || dataFile.isDirectory()) {
+      throw new FileNotFoundException(dataFile.toString());
+    }
+
+    log.info("Creating FileReadonlyIDMigrator for file {}", dataFile);
+
+    this.reloadLock = new ReentrantLock();
+    this.lastModified = dataFile.lastModified();
+    this.minReloadIntervalMS = minReloadIntervalMS;
+
+    reload();
+  }
+
+  @Override
+  public String toStringID(long longID) {
+    return longToString.get(longID);
+  }
+
+  private void reload() {
+    if (reloadLock.tryLock()) {
+      try {
+        longToString = buildMapping();
+      } catch (IOException ioe) {
+        throw new IllegalStateException(ioe);
+      } finally {
+        reloadLock.unlock();
+      }
+    }
+  }
+
+  private FastByIDMap<String> buildMapping() throws IOException {
+    FastByIDMap<String> mapping = new FastByIDMap<>();
+    for (String line : new FileLineIterable(dataFile)) {
+      mapping.put(toLongID(line), line);
+    }
+    lastModified = dataFile.lastModified();
+    return mapping;
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    if (dataFile.lastModified() > lastModified + minReloadIntervalMS) {
+      log.debug("File has changed; reloading...");
+      reload();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "FileIDMigrator[dataFile:" + dataFile + ']';
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
new file mode 100644
index 0000000..8d33f60
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Contains methods and resources useful to all classes in this package.
+ * </p>
+ */
+abstract class AbstractUserNeighborhood implements UserNeighborhood {
+  
+  private final UserSimilarity userSimilarity;
+  private final DataModel dataModel;
+  private final double samplingRate;
+  private final RefreshHelper refreshHelper;
+  
+  AbstractUserNeighborhood(UserSimilarity userSimilarity, DataModel dataModel, double samplingRate) {
+    Preconditions.checkArgument(userSimilarity != null, "userSimilarity is null");
+    Preconditions.checkArgument(dataModel != null, "dataModel is null");
+    Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0, "samplingRate must be in (0,1]");
+    this.userSimilarity = userSimilarity;
+    this.dataModel = dataModel;
+    this.samplingRate = samplingRate;
+    this.refreshHelper = new RefreshHelper(null);
+    this.refreshHelper.addDependency(this.dataModel);
+    this.refreshHelper.addDependency(this.userSimilarity);
+  }
+  
+  final UserSimilarity getUserSimilarity() {
+    return userSimilarity;
+  }
+  
+  final DataModel getDataModel() {
+    return dataModel;
+  }
+  
+  final double getSamplingRate() {
+    return samplingRate;
+  }
+  
+  @Override
+  public final void refresh(Collection<Refreshable> alreadyRefreshed) {
+    refreshHelper.refresh(alreadyRefreshed);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
new file mode 100644
index 0000000..998e476
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.Cache;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.Retriever;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+
+import com.google.common.base.Preconditions;
+
+/** A caching wrapper around an underlying {@link UserNeighborhood} implementation. */
+public final class CachingUserNeighborhood implements UserNeighborhood {
+  
+  private final UserNeighborhood neighborhood;
+  private final Cache<Long,long[]> neighborhoodCache;
+  
+  public CachingUserNeighborhood(UserNeighborhood neighborhood, DataModel dataModel) throws TasteException {
+    Preconditions.checkArgument(neighborhood != null, "neighborhood is null");
+    this.neighborhood = neighborhood;
+    int maxCacheSize = dataModel.getNumUsers(); // just a dumb heuristic for sizing
+    this.neighborhoodCache = new Cache<>(new NeighborhoodRetriever(neighborhood), maxCacheSize);
+  }
+  
+  @Override
+  public long[] getUserNeighborhood(long userID) throws TasteException {
+    return neighborhoodCache.get(userID);
+  }
+  
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    neighborhoodCache.clear();
+    Collection<Refreshable> refreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+    RefreshHelper.maybeRefresh(refreshed, neighborhood);
+  }
+  
+  private static final class NeighborhoodRetriever implements Retriever<Long,long[]> {
+    private final UserNeighborhood neighborhood;
+    
+    private NeighborhoodRetriever(UserNeighborhood neighborhood) {
+      this.neighborhood = neighborhood;
+    }
+    
+    @Override
+    public long[] get(Long key) throws TasteException {
+      return neighborhood.getUserNeighborhood(key);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
new file mode 100644
index 0000000..7f3a98a
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.recommender.TopItems;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Computes a neighborhood consisting of the nearest n users to a given user. "Nearest" is defined by the
+ * given {@link UserSimilarity}.
+ * </p>
+ */
+public final class NearestNUserNeighborhood extends AbstractUserNeighborhood {
+  
+  private final int n;
+  private final double minSimilarity;
+  
+  /**
+   * @param n neighborhood size; capped at the number of users in the data model
+   * @throws IllegalArgumentException
+   *           if {@code n < 1}, or userSimilarity or dataModel are {@code null}
+   */
+  public NearestNUserNeighborhood(int n, UserSimilarity userSimilarity, DataModel dataModel) throws TasteException {
+    this(n, Double.NEGATIVE_INFINITY, userSimilarity, dataModel, 1.0);
+  }
+  
+  /**
+   * @param n neighborhood size; capped at the number of users in the data model
+   * @param minSimilarity minimal similarity required for neighbors
+   * @throws IllegalArgumentException
+   *           if {@code n < 1}, or userSimilarity or dataModel are {@code null}
+   */
+  public NearestNUserNeighborhood(int n,
+                                  double minSimilarity,
+                                  UserSimilarity userSimilarity,
+                                  DataModel dataModel) throws TasteException {
+    this(n, minSimilarity, userSimilarity, dataModel, 1.0);
+  }
+  
+  /**
+   * @param n neighborhood size; capped at the number of users in the data model
+   * @param minSimilarity minimal similarity required for neighbors
+   * @param samplingRate percentage of users to consider when building neighborhood -- decrease to trade quality for
+   *   performance
+   * @throws IllegalArgumentException
+   *           if {@code n < 1} or samplingRate is NaN or not in (0,1], or userSimilarity or dataModel are
+   *           {@code null}
+   */
+  public NearestNUserNeighborhood(int n,
+                                  double minSimilarity,
+                                  UserSimilarity userSimilarity,
+                                  DataModel dataModel,
+                                  double samplingRate) throws TasteException {
+    super(userSimilarity, dataModel, samplingRate);
+    Preconditions.checkArgument(n >= 1, "n must be at least 1");
+    int numUsers = dataModel.getNumUsers();
+    this.n = n > numUsers ? numUsers : n;
+    this.minSimilarity = minSimilarity;
+  }
+  
+  @Override
+  public long[] getUserNeighborhood(long userID) throws TasteException {
+    
+    DataModel dataModel = getDataModel();
+    UserSimilarity userSimilarityImpl = getUserSimilarity();
+    
+    TopItems.Estimator<Long> estimator = new Estimator(userSimilarityImpl, userID, minSimilarity);
+    
+    LongPrimitiveIterator userIDs = SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel.getUserIDs(),
+      getSamplingRate());
+    
+    return TopItems.getTopUsers(n, userIDs, null, estimator);
+  }
+  
+  @Override
+  public String toString() {
+    return "NearestNUserNeighborhood";
+  }
+  
+  private static final class Estimator implements TopItems.Estimator<Long> {
+    private final UserSimilarity userSimilarityImpl;
+    private final long theUserID;
+    private final double minSim;
+    
+    private Estimator(UserSimilarity userSimilarityImpl, long theUserID, double minSim) {
+      this.userSimilarityImpl = userSimilarityImpl;
+      this.theUserID = theUserID;
+      this.minSim = minSim;
+    }
+    
+    @Override
+    public double estimate(Long userID) throws TasteException {
+      if (userID == theUserID) {
+        return Double.NaN;
+      }
+      double sim = userSimilarityImpl.userSimilarity(theUserID, userID);
+      return sim >= minSim ? sim : Double.NaN;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
new file mode 100644
index 0000000..d5246e4
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Computes a neigbhorhood consisting of all users whose similarity to the given user meets or exceeds a
+ * certain threshold. Similarity is defined by the given {@link UserSimilarity}.
+ * </p>
+ */
+public final class ThresholdUserNeighborhood extends AbstractUserNeighborhood {
+  
+  private final double threshold;
+  
+  /**
+   * @param threshold
+   *          similarity threshold
+   * @param userSimilarity
+   *          similarity metric
+   * @param dataModel
+   *          data model
+   * @throws IllegalArgumentException
+   *           if threshold is {@link Double#NaN}, or if samplingRate is not positive and less than or equal
+   *           to 1.0, or if userSimilarity or dataModel are {@code null}
+   */
+  public ThresholdUserNeighborhood(double threshold, UserSimilarity userSimilarity, DataModel dataModel) {
+    this(threshold, userSimilarity, dataModel, 1.0);
+  }
+  
+  /**
+   * @param threshold
+   *          similarity threshold
+   * @param userSimilarity
+   *          similarity metric
+   * @param dataModel
+   *          data model
+   * @param samplingRate
+   *          percentage of users to consider when building neighborhood -- decrease to trade quality for
+   *          performance
+   * @throws IllegalArgumentException
+   *           if threshold or samplingRate is {@link Double#NaN}, or if samplingRate is not positive and less
+   *           than or equal to 1.0, or if userSimilarity or dataModel are {@code null}
+   */
+  public ThresholdUserNeighborhood(double threshold,
+                                   UserSimilarity userSimilarity,
+                                   DataModel dataModel,
+                                   double samplingRate) {
+    super(userSimilarity, dataModel, samplingRate);
+    Preconditions.checkArgument(!Double.isNaN(threshold), "threshold must not be NaN");
+    this.threshold = threshold;
+  }
+  
+  @Override
+  public long[] getUserNeighborhood(long userID) throws TasteException {
+    
+    DataModel dataModel = getDataModel();
+    FastIDSet neighborhood = new FastIDSet();
+    LongPrimitiveIterator usersIterable = SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel
+        .getUserIDs(), getSamplingRate());
+    UserSimilarity userSimilarityImpl = getUserSimilarity();
+    
+    while (usersIterable.hasNext()) {
+      long otherUserID = usersIterable.next();
+      if (userID != otherUserID) {
+        double theSimilarity = userSimilarityImpl.userSimilarity(userID, otherUserID);
+        if (!Double.isNaN(theSimilarity) && theSimilarity >= threshold) {
+          neighborhood.add(otherUserID);
+        }
+      }
+    }
+    
+    return neighborhood.toArray();
+  }
+  
+  @Override
+  public String toString() {
+    return "ThresholdUserNeighborhood";
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
new file mode 100644
index 0000000..d24ea6a
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
+
+import java.util.Collection;
+
+/**
+ * Abstract base implementation for retrieving candidate items to recommend
+ */
+public abstract class AbstractCandidateItemsStrategy implements CandidateItemsStrategy,
+    MostSimilarItemsCandidateItemsStrategy {
+
+  protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) throws TasteException{
+      return doGetCandidateItems(preferredItemIDs, dataModel, false);
+  }
+  
+  @Override
+  public FastIDSet getCandidateItems(long userID, PreferenceArray preferencesFromUser, DataModel dataModel,
+      boolean includeKnownItems) throws TasteException {
+    return doGetCandidateItems(preferencesFromUser.getIDs(), dataModel, includeKnownItems);
+  }
+  
+  @Override
+  public FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel)
+    throws TasteException {
+    return doGetCandidateItems(itemIDs, dataModel, false);
+  }
+     
+  protected abstract FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel,
+      boolean includeKnownItems) throws TasteException;
+  
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {}
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
new file mode 100644
index 0000000..3a62b08
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+
+import java.util.List;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+public abstract class AbstractRecommender implements Recommender {
+  
+  private static final Logger log = LoggerFactory.getLogger(AbstractRecommender.class);
+  
+  private final DataModel dataModel;
+  private final CandidateItemsStrategy candidateItemsStrategy;
+  
+  protected AbstractRecommender(DataModel dataModel, CandidateItemsStrategy candidateItemsStrategy) {
+    this.dataModel = Preconditions.checkNotNull(dataModel);
+    this.candidateItemsStrategy = Preconditions.checkNotNull(candidateItemsStrategy);
+  }
+
+  protected AbstractRecommender(DataModel dataModel) {
+    this(dataModel, getDefaultCandidateItemsStrategy());
+  }
+
+  protected static CandidateItemsStrategy getDefaultCandidateItemsStrategy() {
+    return new PreferredItemsNeighborhoodCandidateItemsStrategy();
+  }
+
+
+  /**
+   * <p>
+   * Default implementation which just calls
+   * {@link Recommender#recommend(long, int, org.apache.mahout.cf.taste.recommender.IDRescorer)}, with a
+   * {@link org.apache.mahout.cf.taste.recommender.Rescorer} that does nothing.
+   * </p>
+   */
+  @Override
+  public List<RecommendedItem> recommend(long userID, int howMany) throws TasteException {
+    return recommend(userID, howMany, null, false);
+  }
+
+  /**
+   * <p>
+   * Default implementation which just calls
+   * {@link Recommender#recommend(long, int, org.apache.mahout.cf.taste.recommender.IDRescorer)}, with a
+   * {@link org.apache.mahout.cf.taste.recommender.Rescorer} that does nothing.
+   * </p>
+   */
+  @Override
+  public List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException {
+    return recommend(userID, howMany, null, includeKnownItems);
+  }
+  
+  /**
+   * <p> Delegates to {@link Recommender#recommend(long, int, IDRescorer, boolean)}
+   */
+  @Override
+  public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer) throws TasteException{
+    return recommend(userID, howMany,rescorer, false);  
+  }
+  
+  /**
+   * <p>
+   * Default implementation which just calls {@link DataModel#setPreference(long, long, float)}.
+   * </p>
+   *
+   * @throws IllegalArgumentException
+   *           if userID or itemID is {@code null}, or if value is {@link Double#NaN}
+   */
+  @Override
+  public void setPreference(long userID, long itemID, float value) throws TasteException {
+    Preconditions.checkArgument(!Float.isNaN(value), "NaN value");
+    log.debug("Setting preference for user {}, item {}", userID, itemID);
+    dataModel.setPreference(userID, itemID, value);
+  }
+  
+  /**
+   * <p>
+   * Default implementation which just calls {@link DataModel#removePreference(long, long)} (Object, Object)}.
+   * </p>
+   *
+   * @throws IllegalArgumentException
+   *           if userID or itemID is {@code null}
+   */
+  @Override
+  public void removePreference(long userID, long itemID) throws TasteException {
+    log.debug("Remove preference for user '{}', item '{}'", userID, itemID);
+    dataModel.removePreference(userID, itemID);
+  }
+  
+  @Override
+  public DataModel getDataModel() {
+    return dataModel;
+  }
+
+  /**
+   * @param userID
+   *          ID of user being evaluated
+   * @param preferencesFromUser
+   *          the preferences from the user
+   * @param includeKnownItems
+   *          whether to include items already known by the user in recommendations
+   * @return all items in the {@link DataModel} for which the user has not expressed a preference and could
+   *         possibly be recommended to the user
+   * @throws TasteException
+   *           if an error occurs while listing items
+   */
+  protected FastIDSet getAllOtherItems(long userID, PreferenceArray preferencesFromUser, boolean includeKnownItems)
+      throws TasteException {
+    return candidateItemsStrategy.getCandidateItems(userID, preferencesFromUser, dataModel, includeKnownItems);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java
new file mode 100644
index 0000000..37389a7
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import com.google.common.base.Preconditions;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+
+/**
+ * returns the result of {@link ItemSimilarity#allSimilarItemIDs(long)} as candidate items
+ */
+public class AllSimilarItemsCandidateItemsStrategy extends AbstractCandidateItemsStrategy {
+
+  private final ItemSimilarity similarity;
+
+  public AllSimilarItemsCandidateItemsStrategy(ItemSimilarity similarity) {
+    Preconditions.checkArgument(similarity != null, "similarity is null");
+    this.similarity = similarity;
+  }
+
+  @Override
+  protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems)
+    throws TasteException {
+    FastIDSet candidateItemIDs = new FastIDSet();
+    for (long itemID : preferredItemIDs) {
+      candidateItemIDs.addAll(similarity.allSimilarItemIDs(itemID));
+    }
+    if (!includeKnownItems) {
+      candidateItemIDs.removeAll(preferredItemIDs);
+    }
+    return candidateItemIDs;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
new file mode 100644
index 0000000..929eddd
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+public final class AllUnknownItemsCandidateItemsStrategy extends AbstractCandidateItemsStrategy {
+
+  /** return all items the user has not yet seen */
+  @Override
+  protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems)
+    throws TasteException {
+    FastIDSet possibleItemIDs = new FastIDSet(dataModel.getNumItems());
+    LongPrimitiveIterator allItemIDs = dataModel.getItemIDs();
+    while (allItemIDs.hasNext()) {
+      possibleItemIDs.add(allItemIDs.nextLong());
+    }
+    if (!includeKnownItems) {
+      possibleItemIDs.removeAll(preferredItemIDs);
+    }
+    return possibleItemIDs;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
new file mode 100644
index 0000000..1677ea8
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+/**
+ * <p>
+ * Defines ordering on {@link RecommendedItem} by the rescored value of the recommendations' estimated
+ * preference value, from high to low.
+ * </p>
+ */
+final class ByRescoreComparator implements Comparator<RecommendedItem>, Serializable {
+  
+  private final IDRescorer rescorer;
+  
+  ByRescoreComparator(IDRescorer rescorer) {
+    this.rescorer = rescorer;
+  }
+  
+  @Override
+  public int compare(RecommendedItem o1, RecommendedItem o2) {
+    double rescored1;
+    double rescored2;
+    if (rescorer == null) {
+      rescored1 = o1.getValue();
+      rescored2 = o2.getValue();
+    } else {
+      rescored1 = rescorer.rescore(o1.getItemID(), o1.getValue());
+      rescored2 = rescorer.rescore(o2.getItemID(), o2.getValue());
+    }
+    if (rescored1 < rescored2) {
+      return 1;
+    } else if (rescored1 > rescored2) {
+      return -1;
+    } else {
+      return 0;
+    }
+  }
+  
+  @Override
+  public String toString() {
+    return "ByRescoreComparator[rescorer:" + rescorer + ']';
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java
new file mode 100644
index 0000000..57c5f3d
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+/**
+ * Defines a natural ordering from most-preferred item (highest value) to least-preferred.
+ */
+public final class ByValueRecommendedItemComparator implements Comparator<RecommendedItem>, Serializable {
+
+  private static final Comparator<RecommendedItem> INSTANCE = new ByValueRecommendedItemComparator();
+
+  public static Comparator<RecommendedItem> getInstance() {
+    return INSTANCE;
+  }
+
+  @Override
+  public int compare(RecommendedItem o1, RecommendedItem o2) {
+    float value1 = o1.getValue();
+    float value2 = o2.getValue();
+    return value1 > value2 ? -1 : value1 < value2 ? 1 : 0;
+  }
+
+}