You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/28 14:54:45 UTC

[17/51] [partial] mahout git commit: NO-JIRA Clean up MR refactor

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
new file mode 100644
index 0000000..8ea1660
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.common.Weighting;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * An implementation of the Pearson correlation. For users X and Y, the following values are calculated:
+ * </p>
+ *
+ * <ul>
+ * <li>sumX2: sum of the square of all X's preference values</li>
+ * <li>sumY2: sum of the square of all Y's preference values</li>
+ * <li>sumXY: sum of the product of X and Y's preference value for all items for which both X and Y express a
+ * preference</li>
+ * </ul>
+ *
+ * <p>
+ * The correlation is then:
+ *
+ * <p>
+ * {@code sumXY / sqrt(sumX2 * sumY2)}
+ * </p>
+ *
+ * <p>
+ * Note that this correlation "centers" its data, shifts the user's preference values so that each of their
+ * means is 0. This is necessary to achieve expected behavior on all data sets.
+ * </p>
+ *
+ * <p>
+ * This correlation implementation is equivalent to the cosine similarity since the data it receives
+ * is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the angle
+ * between the two vectors defined by the users' preference values.
+ * </p>
+ *
+ * <p>
+ * For cosine similarity on uncentered data, see {@link UncenteredCosineSimilarity}.
+ * </p> 
+ */
+public final class PearsonCorrelationSimilarity extends AbstractSimilarity {
+
+  /**
+   * @throws IllegalArgumentException if {@link DataModel} does not have preference values
+   */
+  public PearsonCorrelationSimilarity(DataModel dataModel) throws TasteException {
+    this(dataModel, Weighting.UNWEIGHTED);
+  }
+
+  /**
+   * @throws IllegalArgumentException if {@link DataModel} does not have preference values
+   */
+  public PearsonCorrelationSimilarity(DataModel dataModel, Weighting weighting) throws TasteException {
+    super(dataModel, weighting, true);
+    Preconditions.checkArgument(dataModel.hasPreferenceValues(), "DataModel doesn't have preference values");
+  }
+  
+  @Override
+  double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
+    if (n == 0) {
+      return Double.NaN;
+    }
+    // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
+    // the data is assumed to be centered.
+    double denominator = Math.sqrt(sumX2) * Math.sqrt(sumY2);
+    if (denominator == 0.0) {
+      // One or both parties has -all- the same ratings;
+      // can't really say much similarity under this measure
+      return Double.NaN;
+    }
+    return sumXY / denominator;
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
new file mode 100644
index 0000000..1116368
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Like {@link PearsonCorrelationSimilarity}, but compares relative ranking of preference values instead of
+ * preference values themselves. That is, each user's preferences are sorted and then assign a rank as their
+ * preference value, with 1 being assigned to the least preferred item.
+ * </p>
+ */
+public final class SpearmanCorrelationSimilarity implements UserSimilarity {
+  
+  private final DataModel dataModel;
+  
+  public SpearmanCorrelationSimilarity(DataModel dataModel) {
+    this.dataModel = Preconditions.checkNotNull(dataModel);
+  }
+  
+  @Override
+  public double userSimilarity(long userID1, long userID2) throws TasteException {
+    PreferenceArray xPrefs = dataModel.getPreferencesFromUser(userID1);
+    PreferenceArray yPrefs = dataModel.getPreferencesFromUser(userID2);
+    int xLength = xPrefs.length();
+    int yLength = yPrefs.length();
+    
+    if (xLength <= 1 || yLength <= 1) {
+      return Double.NaN;
+    }
+    
+    // Copy prefs since we need to modify pref values to ranks
+    xPrefs = xPrefs.clone();
+    yPrefs = yPrefs.clone();
+    
+    // First sort by values from low to high
+    xPrefs.sortByValue();
+    yPrefs.sortByValue();
+    
+    // Assign ranks from low to high
+    float nextRank = 1.0f;
+    for (int i = 0; i < xLength; i++) {
+      // ... but only for items that are common to both pref arrays
+      if (yPrefs.hasPrefWithItemID(xPrefs.getItemID(i))) {
+        xPrefs.setValue(i, nextRank);
+        nextRank += 1.0f;
+      }
+      // Other values are bogus but don't matter
+    }
+    nextRank = 1.0f;
+    for (int i = 0; i < yLength; i++) {
+      if (xPrefs.hasPrefWithItemID(yPrefs.getItemID(i))) {
+        yPrefs.setValue(i, nextRank);
+        nextRank += 1.0f;
+      }
+    }
+    
+    xPrefs.sortByItem();
+    yPrefs.sortByItem();
+    
+    long xIndex = xPrefs.getItemID(0);
+    long yIndex = yPrefs.getItemID(0);
+    int xPrefIndex = 0;
+    int yPrefIndex = 0;
+    
+    double sumXYRankDiff2 = 0.0;
+    int count = 0;
+    
+    while (true) {
+      int compare = xIndex < yIndex ? -1 : xIndex > yIndex ? 1 : 0;
+      if (compare == 0) {
+        double diff = xPrefs.getValue(xPrefIndex) - yPrefs.getValue(yPrefIndex);
+        sumXYRankDiff2 += diff * diff;
+        count++;
+      }
+      if (compare <= 0) {
+        if (++xPrefIndex >= xLength) {
+          break;
+        }
+        xIndex = xPrefs.getItemID(xPrefIndex);
+      }
+      if (compare >= 0) {
+        if (++yPrefIndex >= yLength) {
+          break;
+        }
+        yIndex = yPrefs.getItemID(yPrefIndex);
+      }
+    }
+    
+    if (count <= 1) {
+      return Double.NaN;
+    }
+    
+    // When ranks are unique, this formula actually gives the Pearson correlation
+    return 1.0 - 6.0 * sumXYRankDiff2 / (count * (count * count - 1));
+  }
+  
+  @Override
+  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+    throw new UnsupportedOperationException();
+  }
+  
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+    RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
new file mode 100644
index 0000000..0c3a0a4
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+/**
+ * <p>
+ * An implementation of a "similarity" based on the <a
+ * href="http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29">
+ * Tanimoto coefficient</a>, or extended <a href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard
+ * coefficient</a>.
+ * </p>
+ * 
+ * <p>
+ * This is intended for "binary" data sets where a user either expresses a generic "yes" preference for an
+ * item or has no preference. The actual preference values do not matter here, only their presence or absence.
+ * </p>
+ * 
+ * <p>
+ * The value returned is in [0,1].
+ * </p>
+ */
+public final class TanimotoCoefficientSimilarity extends AbstractItemSimilarity implements UserSimilarity {
+
+  public TanimotoCoefficientSimilarity(DataModel dataModel) {
+    super(dataModel);
+  }
+  
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+    throw new UnsupportedOperationException();
+  }
+  
+  @Override
+  public double userSimilarity(long userID1, long userID2) throws TasteException {
+
+    DataModel dataModel = getDataModel();
+    FastIDSet xPrefs = dataModel.getItemIDsFromUser(userID1);
+    FastIDSet yPrefs = dataModel.getItemIDsFromUser(userID2);
+
+    int xPrefsSize = xPrefs.size();
+    int yPrefsSize = yPrefs.size();
+    if (xPrefsSize == 0 && yPrefsSize == 0) {
+      return Double.NaN;
+    }
+    if (xPrefsSize == 0 || yPrefsSize == 0) {
+      return 0.0;
+    }
+    
+    int intersectionSize =
+        xPrefsSize < yPrefsSize ? yPrefs.intersectionSize(xPrefs) : xPrefs.intersectionSize(yPrefs);
+    if (intersectionSize == 0) {
+      return Double.NaN;
+    }
+    
+    int unionSize = xPrefsSize + yPrefsSize - intersectionSize;
+    
+    return (double) intersectionSize / (double) unionSize;
+  }
+  
+  @Override
+  public double itemSimilarity(long itemID1, long itemID2) throws TasteException {
+    int preferring1 = getDataModel().getNumUsersWithPreferenceFor(itemID1);
+    return doItemSimilarity(itemID1, itemID2, preferring1);
+  }
+
+  @Override
+  public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException {
+    int preferring1 = getDataModel().getNumUsersWithPreferenceFor(itemID1);
+    int length = itemID2s.length;
+    double[] result = new double[length];
+    for (int i = 0; i < length; i++) {
+      result[i] = doItemSimilarity(itemID1, itemID2s[i], preferring1);
+    }
+    return result;
+  }
+
+  private double doItemSimilarity(long itemID1, long itemID2, int preferring1) throws TasteException {
+    DataModel dataModel = getDataModel();
+    int preferring1and2 = dataModel.getNumUsersWithPreferenceFor(itemID1, itemID2);
+    if (preferring1and2 == 0) {
+      return Double.NaN;
+    }
+    int preferring2 = dataModel.getNumUsersWithPreferenceFor(itemID2);
+    return (double) preferring1and2 / (double) (preferring1 + preferring2 - preferring1and2);
+  }
+  
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+    RefreshHelper.maybeRefresh(alreadyRefreshed, getDataModel());
+  }
+  
+  @Override
+  public String toString() {
+    return "TanimotoCoefficientSimilarity[dataModel:" + getDataModel() + ']';
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java
new file mode 100644
index 0000000..6260606
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.common.Weighting;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * An implementation of the cosine similarity. The result is the cosine of the angle formed between
+ * the two preference vectors.
+ * </p>
+ *
+ * <p>
+ * Note that this similarity does not "center" its data, shifts the user's preference values so that each of their
+ * means is 0. For this behavior, use {@link PearsonCorrelationSimilarity}, which actually is mathematically
+ * equivalent for centered data.
+ * </p>
+ */
+public final class UncenteredCosineSimilarity extends AbstractSimilarity {
+
+  /**
+   * @throws IllegalArgumentException if {@link DataModel} does not have preference values
+   */
+  public UncenteredCosineSimilarity(DataModel dataModel) throws TasteException {
+    this(dataModel, Weighting.UNWEIGHTED);
+  }
+
+  /**
+   * @throws IllegalArgumentException if {@link DataModel} does not have preference values
+   */
+  public UncenteredCosineSimilarity(DataModel dataModel, Weighting weighting) throws TasteException {
+    super(dataModel, weighting, false);
+    Preconditions.checkArgument(dataModel.hasPreferenceValues(), "DataModel doesn't have preference values");
+  }
+
+  @Override
+  double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
+    if (n == 0) {
+      return Double.NaN;
+    }
+    double denominator = Math.sqrt(sumX2) * Math.sqrt(sumY2);
+    if (denominator == 0.0) {
+      // One or both parties has -all- the same ratings;
+      // can't really say much similarity under this measure
+      return Double.NaN;
+    }
+    return sumXY / denominator;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java
new file mode 100644
index 0000000..1ae45c2
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.file;
+
+import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * {@link Iterable} to be able to read a file linewise into a {@link GenericItemSimilarity}
+ */
+final class FileItemItemSimilarityIterable implements Iterable<GenericItemSimilarity.ItemItemSimilarity> {
+
+  private final File similaritiesFile;
+
+  FileItemItemSimilarityIterable(File similaritiesFile) {
+    this.similaritiesFile = similaritiesFile;
+  }
+
+  @Override
+  public Iterator<GenericItemSimilarity.ItemItemSimilarity> iterator() {
+    try {
+      return new FileItemItemSimilarityIterator(similaritiesFile);
+    } catch (IOException ioe) {
+      throw new IllegalStateException("Can't read " + similaritiesFile, ioe);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java
new file mode 100644
index 0000000..c071159
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.file;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ForwardingIterator;
+import com.google.common.collect.Iterators;
+import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
+import org.apache.mahout.common.iterator.FileLineIterator;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.regex.Pattern;
+
+/**
+ * a simple iterator using a {@link FileLineIterator} internally, parsing each
+ * line into an {@link GenericItemSimilarity.ItemItemSimilarity}.
+ */
+final class FileItemItemSimilarityIterator extends ForwardingIterator<GenericItemSimilarity.ItemItemSimilarity> {
+
+  private static final Pattern SEPARATOR = Pattern.compile("[,\t]");
+
+  private final Iterator<GenericItemSimilarity.ItemItemSimilarity> delegate;
+
+  FileItemItemSimilarityIterator(File similaritiesFile) throws IOException {
+    delegate = Iterators.transform(
+        new FileLineIterator(similaritiesFile),
+        new Function<String, GenericItemSimilarity.ItemItemSimilarity>() {
+          @Override
+          public GenericItemSimilarity.ItemItemSimilarity apply(String from) {
+            String[] tokens = SEPARATOR.split(from);
+            return new GenericItemSimilarity.ItemItemSimilarity(Long.parseLong(tokens[0]),
+                                                                Long.parseLong(tokens[1]),
+                                                                Double.parseDouble(tokens[2]));
+          }
+        });
+  }
+
+  @Override
+  protected Iterator<GenericItemSimilarity.ItemItemSimilarity> delegate() {
+    return delegate;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java
new file mode 100644
index 0000000..712b96a
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.file;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * An {@link ItemSimilarity} backed by a comma-delimited file. This class typically expects a file where each line
+ * contains an item ID, followed by another item ID, followed by a similarity value, separated by commas. You may also
+ * use tabs.
+ * </p>
+ *
+ * <p>
+ * The similarity value is assumed to be parseable as a {@code double} having a value between -1 and 1. The
+ * item IDs are parsed as {@code long}s. Similarities are symmetric so for a pair of items you do not have to
+ * include 2 lines in the file.
+ * </p>
+ *
+ * <p>
+ * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file
+ * has been reloaded very recently already.
+ * </p>
+ *
+ * <p>
+ * This class is not intended for use with very large amounts of data. For that, a JDBC-backed {@link ItemSimilarity}
+ * and a database are more appropriate.
+ * </p>
+ */
+public class FileItemSimilarity implements ItemSimilarity {
+
+  public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
+
+  private ItemSimilarity delegate;
+  private final ReentrantLock reloadLock;
+  private final File dataFile;
+  private long lastModified;
+  private final long minReloadIntervalMS;
+
+  private static final Logger log = LoggerFactory.getLogger(FileItemSimilarity.class);
+
+  /**
+   * @param dataFile
+   *          file containing the similarity data
+   */
+  public FileItemSimilarity(File dataFile) {
+    this(dataFile, DEFAULT_MIN_RELOAD_INTERVAL_MS);
+  }
+
+  /**
+   * @param minReloadIntervalMS
+   *          the minimum interval in milliseconds after which a full reload of the original datafile is done
+   *          when refresh() is called
+   * @see #FileItemSimilarity(File)
+   */
+  public FileItemSimilarity(File dataFile, long minReloadIntervalMS) {
+    Preconditions.checkArgument(dataFile != null, "dataFile is null");
+    Preconditions.checkArgument(dataFile.exists() && !dataFile.isDirectory(),
+      "dataFile is missing or a directory: %s", dataFile);
+
+    log.info("Creating FileItemSimilarity for file {}", dataFile);
+
+    this.dataFile = dataFile.getAbsoluteFile();
+    this.lastModified = dataFile.lastModified();
+    this.minReloadIntervalMS = minReloadIntervalMS;
+    this.reloadLock = new ReentrantLock();
+
+    reload();
+  }
+
+  @Override
+  public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException {
+    return delegate.itemSimilarities(itemID1, itemID2s);
+  }
+
+  @Override
+  public long[] allSimilarItemIDs(long itemID) throws TasteException {
+    return delegate.allSimilarItemIDs(itemID);
+  }
+
+  @Override
+  public double itemSimilarity(long itemID1, long itemID2) throws TasteException {
+    return delegate.itemSimilarity(itemID1, itemID2);
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    if (dataFile.lastModified() > lastModified + minReloadIntervalMS) {
+      log.debug("File has changed; reloading...");
+      reload();
+    }
+  }
+
+  protected void reload() {
+    if (reloadLock.tryLock()) {
+      try {
+        long newLastModified = dataFile.lastModified();
+        delegate = new GenericItemSimilarity(new FileItemItemSimilarityIterable(dataFile));
+        lastModified = newLastModified;
+      } finally {
+        reloadLock.unlock();
+      }
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "FileItemSimilarity[dataFile:" + dataFile + ']';
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
new file mode 100644
index 0000000..631ec9b
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.precompute;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+import com.google.common.io.Closeables;
+import org.apache.commons.io.Charsets;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
+
+/**
+ * Persist the precomputed item similarities to a file that can later be used
+ * by a {@link org.apache.mahout.cf.taste.impl.similarity.file.FileItemSimilarity}
+ */
+public class FileSimilarItemsWriter implements SimilarItemsWriter {
+
+  private final File file;
+  private BufferedWriter writer;
+
+  public FileSimilarItemsWriter(File file) {
+    this.file = file;
+  }
+
+  @Override
+  public void open() throws IOException {
+    writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8));
+  }
+
+  @Override
+  public void add(SimilarItems similarItems) throws IOException {
+    String itemID = String.valueOf(similarItems.getItemID());
+    for (SimilarItem similarItem : similarItems.getSimilarItems()) {
+      writer.write(itemID);
+      writer.write(',');
+      writer.write(String.valueOf(similarItem.getItemID()));
+      writer.write(',');
+      writer.write(String.valueOf(similarItem.getSimilarity()));
+      writer.newLine();
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    Closeables.close(writer, false);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
new file mode 100644
index 0000000..b7b52cf
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.precompute;
+
+import com.google.common.io.Closeables;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Precompute item similarities in parallel on a single machine. The recommender given to this class must use a
+ * DataModel that holds the interactions in memory (such as
+ * {@link org.apache.mahout.cf.taste.impl.model.GenericDataModel} or
+ * {@link org.apache.mahout.cf.taste.impl.model.file.FileDataModel}) as fast random access to the data is required
+ */
+public class MultithreadedBatchItemSimilarities extends BatchItemSimilarities {
+
+  private int batchSize;
+
+  private static final int DEFAULT_BATCH_SIZE = 100;
+
+  private static final Logger log = LoggerFactory.getLogger(MultithreadedBatchItemSimilarities.class);
+
+  /**
+   * @param recommender recommender to use
+   * @param similarItemsPerItem number of similar items to compute per item
+   */
+  public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem) {
+    this(recommender, similarItemsPerItem, DEFAULT_BATCH_SIZE);
+  }
+
+  /**
+   * @param recommender recommender to use
+   * @param similarItemsPerItem number of similar items to compute per item
+   * @param batchSize size of item batches sent to worker threads
+   */
+  public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem, int batchSize) {
+    super(recommender, similarItemsPerItem);
+    this.batchSize = batchSize;
+  }
+
+  @Override
+  public int computeItemSimilarities(int degreeOfParallelism, int maxDurationInHours, SimilarItemsWriter writer)
+    throws IOException {
+
+    ExecutorService executorService = Executors.newFixedThreadPool(degreeOfParallelism + 1);
+
+    Output output = null;
+    try {
+      writer.open();
+
+      DataModel dataModel = getRecommender().getDataModel();
+
+      BlockingQueue<long[]> itemsIDsInBatches = queueItemIDsInBatches(dataModel, batchSize, degreeOfParallelism);
+      BlockingQueue<List<SimilarItems>> results = new LinkedBlockingQueue<>();
+
+      AtomicInteger numActiveWorkers = new AtomicInteger(degreeOfParallelism);
+      for (int n = 0; n < degreeOfParallelism; n++) {
+        executorService.execute(new SimilarItemsWorker(n, itemsIDsInBatches, results, numActiveWorkers));
+      }
+
+      output = new Output(results, writer, numActiveWorkers);
+      executorService.execute(output);
+
+    } catch (Exception e) {
+      throw new IOException(e);
+    } finally {
+      executorService.shutdown();
+      try {
+        boolean succeeded = executorService.awaitTermination(maxDurationInHours, TimeUnit.HOURS);
+        if (!succeeded) {
+          throw new RuntimeException("Unable to complete the computation in " + maxDurationInHours + " hours!");
+        }
+      } catch (InterruptedException e) {
+        throw new RuntimeException(e);
+      }
+      Closeables.close(writer, false);
+    }
+
+    return output.getNumSimilaritiesProcessed();
+  }
+
+  private static BlockingQueue<long[]> queueItemIDsInBatches(DataModel dataModel, int batchSize,
+                                                             int degreeOfParallelism)
+      throws TasteException {
+
+    LongPrimitiveIterator itemIDs = dataModel.getItemIDs();
+    int numItems = dataModel.getNumItems();
+
+    BlockingQueue<long[]> itemIDBatches = new LinkedBlockingQueue<>((numItems / batchSize) + 1);
+
+    long[] batch = new long[batchSize];
+    int pos = 0;
+    while (itemIDs.hasNext()) {
+      batch[pos] = itemIDs.nextLong();
+      pos++;
+      if (pos == batchSize) {
+        itemIDBatches.add(batch.clone());
+        pos = 0;
+      }
+    }
+
+    if (pos > 0) {
+      long[] lastBatch = new long[pos];
+      System.arraycopy(batch, 0, lastBatch, 0, pos);
+      itemIDBatches.add(lastBatch);
+    }
+
+    if (itemIDBatches.size() < degreeOfParallelism) {
+      throw new IllegalStateException("Degree of parallelism [" + degreeOfParallelism + "] " +
+          " is larger than number of batches [" + itemIDBatches.size() +"].");
+    }
+
+    log.info("Queued {} items in {} batches", numItems, itemIDBatches.size());
+
+    return itemIDBatches;
+  }
+
+
+  private static class Output implements Runnable {
+
+    private final BlockingQueue<List<SimilarItems>> results;
+    private final SimilarItemsWriter writer;
+    private final AtomicInteger numActiveWorkers;
+    private int numSimilaritiesProcessed = 0;
+
+    Output(BlockingQueue<List<SimilarItems>> results, SimilarItemsWriter writer, AtomicInteger numActiveWorkers) {
+      this.results = results;
+      this.writer = writer;
+      this.numActiveWorkers = numActiveWorkers;
+    }
+
+    private int getNumSimilaritiesProcessed() {
+      return numSimilaritiesProcessed;
+    }
+
+    @Override
+    public void run() {
+      while (numActiveWorkers.get() != 0 || !results.isEmpty()) {
+        try {
+          List<SimilarItems> similarItemsOfABatch = results.poll(10, TimeUnit.MILLISECONDS);
+          if (similarItemsOfABatch != null) {
+            for (SimilarItems similarItems : similarItemsOfABatch) {
+              writer.add(similarItems);
+              numSimilaritiesProcessed += similarItems.numSimilarItems();
+            }
+          }
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+  }
+
+  private class SimilarItemsWorker implements Runnable {
+
+    private final int number;
+    private final BlockingQueue<long[]> itemIDBatches;
+    private final BlockingQueue<List<SimilarItems>> results;
+    private final AtomicInteger numActiveWorkers;
+
+    SimilarItemsWorker(int number, BlockingQueue<long[]> itemIDBatches, BlockingQueue<List<SimilarItems>> results,
+        AtomicInteger numActiveWorkers) {
+      this.number = number;
+      this.itemIDBatches = itemIDBatches;
+      this.results = results;
+      this.numActiveWorkers = numActiveWorkers;
+    }
+
+    @Override
+    public void run() {
+
+      int numBatchesProcessed = 0;
+      while (!itemIDBatches.isEmpty()) {
+        try {
+          long[] itemIDBatch = itemIDBatches.take();
+
+          List<SimilarItems> similarItemsOfBatch = new ArrayList<>(itemIDBatch.length);
+          for (long itemID : itemIDBatch) {
+            List<RecommendedItem> similarItems = getRecommender().mostSimilarItems(itemID, getSimilarItemsPerItem());
+            similarItemsOfBatch.add(new SimilarItems(itemID, similarItems));
+          }
+
+          results.offer(similarItemsOfBatch);
+
+          if (++numBatchesProcessed % 5 == 0) {
+            log.info("worker {} processed {} batches", number, numBatchesProcessed);
+          }
+
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+      log.info("worker {} processed {} batches. done.", number, numBatchesProcessed);
+      numActiveWorkers.decrementAndGet();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
new file mode 100644
index 0000000..022d02d
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import java.io.Serializable;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+
+/**
+ * <p>
+ * Implementations represent a repository of information about users and their associated {@link Preference}s
+ * for items.
+ * </p>
+ */
+public interface DataModel extends Refreshable, Serializable {
+  
+  /**
+   * @return all user IDs in the model, in order
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  LongPrimitiveIterator getUserIDs() throws TasteException;
+  
+  /**
+   * @param userID
+   *          ID of user to get prefs for
+   * @return user's preferences, ordered by item ID
+   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+   *           if the user does not exist
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  PreferenceArray getPreferencesFromUser(long userID) throws TasteException;
+  
+  /**
+   * @param userID
+   *          ID of user to get prefs for
+   * @return IDs of items user expresses a preference for
+   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+   *           if the user does not exist
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  FastIDSet getItemIDsFromUser(long userID) throws TasteException;
+  
+  /**
+   * @return a {@link LongPrimitiveIterator} of all item IDs in the model, in order
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  LongPrimitiveIterator getItemIDs() throws TasteException;
+  
+  /**
+   * @param itemID
+   *          item ID
+   * @return all existing {@link Preference}s expressed for that item, ordered by user ID, as an array
+   * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
+   *           if the item does not exist
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  PreferenceArray getPreferencesForItem(long itemID) throws TasteException;
+  
+  /**
+   * Retrieves the preference value for a single user and item.
+   * 
+   * @param userID
+   *          user ID to get pref value from
+   * @param itemID
+   *          item ID to get pref value for
+   * @return preference value from the given user for the given item or null if none exists
+   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+   *           if the user does not exist
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  Float getPreferenceValue(long userID, long itemID) throws TasteException;
+
+  /**
+   * Retrieves the time at which a preference value from a user and item was set, if known.
+   * Time is expressed in the usual way, as a number of milliseconds since the epoch.
+   *
+   * @param userID user ID for preference in question
+   * @param itemID item ID for preference in question
+   * @return time at which preference was set or null if no preference exists or its time is not known
+   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException if the user does not exist
+   * @throws TasteException if an error occurs while accessing the data
+   */
+  Long getPreferenceTime(long userID, long itemID) throws TasteException;
+  
+  /**
+   * @return total number of items known to the model. This is generally the union of all items preferred by
+   *         at least one user but could include more.
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  int getNumItems() throws TasteException;
+  
+  /**
+   * @return total number of users known to the model.
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  int getNumUsers() throws TasteException;
+  
+  /**
+   * @param itemID item ID to check for
+   * @return the number of users who have expressed a preference for the item
+   * @throws TasteException if an error occurs while accessing the data
+   */
+  int getNumUsersWithPreferenceFor(long itemID) throws TasteException;
+
+  /**
+   * @param itemID1 first item ID to check for
+   * @param itemID2 second item ID to check for
+   * @return the number of users who have expressed a preference for the items
+   * @throws TasteException if an error occurs while accessing the data
+   */
+  int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException;
+  
+  /**
+   * <p>
+   * Sets a particular preference (item plus rating) for a user.
+   * </p>
+   * 
+   * @param userID
+   *          user to set preference for
+   * @param itemID
+   *          item to set preference for
+   * @param value
+   *          preference value
+   * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
+   *           if the item does not exist
+   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+   *           if the user does not exist
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  void setPreference(long userID, long itemID, float value) throws TasteException;
+  
+  /**
+   * <p>
+   * Removes a particular preference for a user.
+   * </p>
+   * 
+   * @param userID
+   *          user from which to remove preference
+   * @param itemID
+   *          item to remove preference for
+   * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
+   *           if the item does not exist
+   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+   *           if the user does not exist
+   * @throws TasteException
+   *           if an error occurs while accessing the data
+   */
+  void removePreference(long userID, long itemID) throws TasteException;
+
+  /**
+   * @return true if this implementation actually stores and returns distinct preference values;
+   *  that is, if it is not a 'boolean' DataModel
+   */
+  boolean hasPreferenceValues();
+
+  /**
+   * @return the maximum preference value that is possible in the current problem domain being evaluated. For
+   * example, if the domain is movie ratings on a scale of 1 to 5, this should be 5. While a
+   * {@link org.apache.mahout.cf.taste.recommender.Recommender} may estimate a preference value above 5.0, it
+   * isn't "fair" to consider that the system is actually suggesting an impossible rating of, say, 5.4 stars.
+   * In practice the application would cap this estimate to 5.0. Since evaluators evaluate
+   * the difference between estimated and actual value, this at least prevents this effect from unfairly
+   * penalizing a {@link org.apache.mahout.cf.taste.recommender.Recommender}
+   */
+  float getMaxPreference();
+
+  /**
+   * @see #getMaxPreference()
+   */
+  float getMinPreference();
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java
new file mode 100644
index 0000000..cc477fe
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+
+/**
+ * <p>
+ * Mahout 0.2 changed the framework to operate only in terms of numeric (long) ID values for users and items.
+ * This is, obviously, not compatible with applications that used other key types -- most commonly
+ * {@link String}. Implementation of this class provide support for mapping String to longs and vice versa in
+ * order to provide a smoother migration path to applications that must still use strings as IDs.
+ * </p>
+ * 
+ * <p>
+ * The mapping from strings to 64-bit numeric values is fixed here, to provide a standard implementation that
+ * is 'portable' or reproducible outside the framework easily. See {@link #toLongID(String)}.
+ * </p>
+ * 
+ * <p>
+ * Because this mapping is deterministically computable, it does not need to be stored. Indeed, subclasses'
+ * job is to store the reverse mapping. There are an infinite number of strings but only a fixed number of
+ * longs, so, it is possible for two strings to map to the same value. Subclasses do not treat this as an
+ * error but rather retain only the most recent mapping, overwriting a previous mapping. The probability of
+ * collision in a 64-bit space is quite small, but not zero. However, in the context of a collaborative
+ * filtering problem, the consequence of a collision is small, at worst -- perhaps one user receives another
+ * recommendations.
+ * </p>
+ * 
+ * @since 0.2
+ */
+public interface IDMigrator extends Refreshable {
+  
+  /**
+   * @return the top 8 bytes of the MD5 hash of the bytes of the given {@link String}'s UTF-8 encoding as a
+   *         long.
+   */
+  long toLongID(String stringID);
+  
+  /**
+   * @return the string ID most recently associated with the given long ID, or null if doesn't exist
+   * @throws TasteException
+   *           if an error occurs while retrieving the mapping
+   */
+  String toStringID(long longID) throws TasteException;
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java
new file mode 100644
index 0000000..e91ed48
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import javax.sql.DataSource;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+
+public interface JDBCDataModel extends DataModel {
+  
+  /**
+   * @return {@link DataSource} underlying this model
+   */
+  DataSource getDataSource();
+  
+  /**
+   * Hmm, should this exist elsewhere? seems like most relevant for a DB implementation, which is not in
+   * memory, which might want to export to memory.
+   * 
+   * @return all user preference data
+   */
+  FastByIDMap<PreferenceArray> exportWithPrefs() throws TasteException;
+  
+  FastByIDMap<FastIDSet> exportWithIDsOnly() throws TasteException;
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java
new file mode 100644
index 0000000..fe0150a
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+/**
+ * <p>
+ * A {@link Preference} encapsulates an item and a preference value, which indicates the strength of the
+ * preference for it. {@link Preference}s are associated to users.
+ * </p>
+ */
+public interface Preference {
+  
+  /** @return ID of user who prefers the item */
+  long getUserID();
+  
+  /** @return item ID that is preferred */
+  long getItemID();
+  
+  /**
+   * @return strength of the preference for that item. Zero should indicate "no preference either way";
+   *         positive values indicate preference and negative values indicate dislike
+   */
+  float getValue();
+  
+  /**
+   * Sets the strength of the preference for this item
+   * 
+   * @param value
+   *          new preference
+   */
+  void setValue(float value);
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java
new file mode 100644
index 0000000..3886bc6
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import java.io.Serializable;
+
+/**
+ * An alternate representation of an array of {@link Preference}. Implementations, in theory, can produce a
+ * more memory-efficient representation.
+ */
+public interface PreferenceArray extends Cloneable, Serializable, Iterable<Preference> {
+  
+  /**
+   * @return size of length of the "array"
+   */
+  int length();
+  
+  /**
+   * @param i
+   *          index
+   * @return a materialized {@link Preference} representation of the preference at i
+   */
+  Preference get(int i);
+  
+  /**
+   * Sets preference at i from information in the given {@link Preference}
+   * 
+   * @param i
+   * @param pref
+   */
+  void set(int i, Preference pref);
+  
+  /**
+   * @param i
+   *          index
+   * @return user ID from preference at i
+   */
+  long getUserID(int i);
+  
+  /**
+   * Sets user ID for preference at i.
+   * 
+   * @param i
+   *          index
+   * @param userID
+   *          new user ID
+   */
+  void setUserID(int i, long userID);
+  
+  /**
+   * @param i
+   *          index
+   * @return item ID from preference at i
+   */
+  long getItemID(int i);
+  
+  /**
+   * Sets item ID for preference at i.
+   * 
+   * @param i
+   *          index
+   * @param itemID
+   *          new item ID
+   */
+  void setItemID(int i, long itemID);
+
+  /**
+   * @return all user or item IDs
+   */
+  long[] getIDs();
+  
+  /**
+   * @param i
+   *          index
+   * @return preference value from preference at i
+   */
+  float getValue(int i);
+  
+  /**
+   * Sets preference value for preference at i.
+   * 
+   * @param i
+   *          index
+   * @param value
+   *          new preference value
+   */
+  void setValue(int i, float value);
+  
+  /**
+   * @return independent copy of this object
+   */
+  PreferenceArray clone();
+  
+  /**
+   * Sorts underlying array by user ID, ascending.
+   */
+  void sortByUser();
+  
+  /**
+   * Sorts underlying array by item ID, ascending.
+   */
+  void sortByItem();
+  
+  /**
+   * Sorts underlying array by preference value, ascending.
+   */
+  void sortByValue();
+  
+  /**
+   * Sorts underlying array by preference value, descending.
+   */
+  void sortByValueReversed();
+  
+  /**
+   * @param userID
+   *          user ID
+   * @return true if array contains a preference with given user ID
+   */
+  boolean hasPrefWithUserID(long userID);
+  
+  /**
+   * @param itemID
+   *          item ID
+   * @return true if array contains a preference with given item ID
+   */
+  boolean hasPrefWithItemID(long itemID);
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java
new file mode 100644
index 0000000..ff29a34
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+
+public interface UpdatableIDMigrator extends IDMigrator {
+  
+  /**
+   * Stores the reverse long-to-String mapping in some kind of backing store. Note that this must be called
+   * directly (or indirectly through {@link #initialize(Iterable)}) for every String that might be encountered
+   * in the application, or else the mapping will not be known.
+   *
+   * @param longID
+   *          long ID
+   * @param stringID
+   *          string ID that maps to/from that long ID
+   * @throws TasteException
+   *           if an error occurs while saving the mapping
+   */
+  void storeMapping(long longID, String stringID) throws TasteException;
+
+  /**
+   * Make the mapping aware of the given string IDs. This must be called initially before the implementation
+   * is used, or else it will not be aware of reverse long-to-String mappings.
+   *
+   * @throws TasteException
+   *           if an error occurs while storing the mappings
+   */
+  void initialize(Iterable<String> stringIDs) throws TasteException;
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java
new file mode 100644
index 0000000..2a143e1
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.neighborhood;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+
+/**
+ * <p>
+ * Implementations of this interface compute a "neighborhood" of users like a given user. This neighborhood
+ * can be used to compute recommendations then.
+ * </p>
+ */
+public interface UserNeighborhood extends Refreshable {
+  
+  /**
+   * @param userID
+   *          ID of user for which a neighborhood will be computed
+   * @return IDs of users in the neighborhood
+   * @throws TasteException
+   *           if an error occurs while accessing data
+   */
+  long[] getUserNeighborhood(long userID) throws TasteException;
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java
new file mode 100644
index 0000000..ada1949
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+/**
+ * Used to retrieve all items that could possibly be recommended to the user
+ */
+public interface CandidateItemsStrategy extends Refreshable {
+
+  /**
+   * @return IDs of all items that could be recommended to the user
+   */
+  FastIDSet getCandidateItems(long userID, PreferenceArray preferencesFromUser, DataModel dataModel,
+     boolean includeKnownItems) throws TasteException;
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java
new file mode 100644
index 0000000..d9a9cf7
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+/**
+ * <p>
+ * A {@link Rescorer} which operates on {@code long} primitive IDs, rather than arbitrary {@link Object}s.
+ * This is provided since most uses of this interface in the framework take IDs (as {@code long}) as an
+ * argument, and so this can be used to avoid unnecessary boxing/unboxing.
+ * </p>
+ */
+public interface IDRescorer {
+  
+  /**
+   * @param id
+   *          ID of thing (user, item, etc.) to rescore
+   * @param originalScore
+   *          original score
+   * @return modified score, or {@link Double#NaN} to indicate that this should be excluded entirely
+   */
+  double rescore(long id, double originalScore);
+  
+  /**
+   * Returns {@code true} to exclude the given thing.
+   *
+   * @param id
+   *          ID of thing (user, item, etc.) to rescore
+   * @return {@code true} to exclude, {@code false} otherwise
+   */
+  boolean isFiltered(long id);
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
new file mode 100644
index 0000000..570f851
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import java.util.List;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.common.LongPair;
+
+/**
+ * <p>
+ * Interface implemented by "item-based" recommenders.
+ * </p>
+ */
+public interface ItemBasedRecommender extends Recommender {
+  
+  /**
+   * @param itemID
+   *          ID of item for which to find most similar other items
+   * @param howMany
+   *          desired number of most similar items to find
+   * @return items most similar to the given item, ordered from most similar to least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+   */
+  List<RecommendedItem> mostSimilarItems(long itemID, int howMany) throws TasteException;
+  
+  /**
+   * @param itemID
+   *          ID of item for which to find most similar other items
+   * @param howMany
+   *          desired number of most similar items to find
+   * @param rescorer
+   *          {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+   *          items
+   * @return itemss most similar to the given item, ordered from most similar to least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+   */
+  List<RecommendedItem> mostSimilarItems(long itemID, int howMany, Rescorer<LongPair> rescorer) throws TasteException;
+  
+  /**
+   * @param itemIDs
+   *          IDs of item for which to find most similar other items
+   * @param howMany
+   *          desired number of most similar items to find estimates used to determine most similar items
+   * @return items most similar to the given items, ordered from most similar to least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+   */
+  List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany) throws TasteException;
+  
+  /**
+   * @param itemIDs
+   *          IDs of item for which to find most similar other items
+   * @param howMany
+   *          desired number of most similar items to find
+   * @param rescorer
+   *          {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+   *          items
+   * @return items most similar to the given items, ordered from most similar to least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+   */
+  List<RecommendedItem> mostSimilarItems(long[] itemIDs,
+                                         int howMany,
+                                         Rescorer<LongPair> rescorer) throws TasteException;
+
+  /**
+   * @param itemIDs
+   *          IDs of item for which to find most similar other items
+   * @param howMany
+   *          desired number of most similar items to find
+   * @param excludeItemIfNotSimilarToAll
+   *          exclude an item if it is not similar to each of the input items
+   * @return items most similar to the given items, ordered from most similar to least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+   */
+  List<RecommendedItem> mostSimilarItems(long[] itemIDs,
+                                         int howMany,
+                                         boolean excludeItemIfNotSimilarToAll) throws TasteException;
+
+  /**
+   * @param itemIDs
+   *          IDs of item for which to find most similar other items
+   * @param howMany
+   *          desired number of most similar items to find
+   * @param rescorer
+   *          {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+   *          items
+   * @param excludeItemIfNotSimilarToAll
+   *          exclude an item if it is not similar to each of the input items
+   * @return items most similar to the given items, ordered from most similar to least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+   */
+  List<RecommendedItem> mostSimilarItems(long[] itemIDs,
+                                         int howMany,
+                                         Rescorer<LongPair> rescorer,
+                                         boolean excludeItemIfNotSimilarToAll) throws TasteException;
+
+  /**
+   * <p>
+   * Lists the items that were most influential in recommending a given item to a given user. Exactly how this
+   * is determined is left to the implementation, but, generally this will return items that the user prefers
+   * and that are similar to the given item.
+   * </p>
+   * 
+   * <p>
+   * This returns a {@link List} of {@link RecommendedItem} which is a little misleading since it's returning
+   * recommend<strong>ing</strong> items, but, I thought it more natural to just reuse this class since it
+   * encapsulates an item and value. The value here does not necessarily have a consistent interpretation or
+   * expected range; it will be higher the more influential the item was in the recommendation.
+   * </p>
+   * 
+   * @param userID
+   *          ID of user who was recommended the item
+   * @param itemID
+   *          ID of item that was recommended
+   * @param howMany
+   *          maximum number of items to return
+   * @return {@link List} of {@link RecommendedItem}, ordered from most influential in recommended the given
+   *         item to least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+   */
+  List<RecommendedItem> recommendedBecause(long userID, long itemID, int howMany) throws TasteException;
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
new file mode 100644
index 0000000..282ceff
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * Used to retrieve all items that could possibly be similar
+ */
+public interface MostSimilarItemsCandidateItemsStrategy extends Refreshable {
+
+  FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel) throws TasteException;
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java
new file mode 100644
index 0000000..1fcece8
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+/**
+ * <p>
+ * Implementations encapsulate items that are recommended, and include the item recommended and a value
+ * expressing the strength of the preference.
+ * </p>
+ */
+public interface RecommendedItem {
+  
+  /** @return the recommended item ID */
+  long getItemID();
+  
+  /**
+   * <p>
+   * A value expressing the strength of the preference for the recommended item. The range of the values
+   * depends on the implementation. Implementations must use larger values to express stronger preference.
+   * </p>
+   * 
+   * @return strength of the preference
+   */
+  float getValue();
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java
new file mode 100644
index 0000000..4135aff
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import java.util.List;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * <p>
+ * Implementations of this interface can recommend items for a user. Implementations will likely take
+ * advantage of several classes in other packages here to compute this.
+ * </p>
+ */
+public interface Recommender extends Refreshable {
+  
+  /**
+   * @param userID
+   *          user for which recommendations are to be computed
+   * @param howMany
+   *          desired number of recommendations
+   * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to
+   *         least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  List<RecommendedItem> recommend(long userID, int howMany) throws TasteException;
+
+  /**
+   * @param userID
+   *          user for which recommendations are to be computed
+   * @param howMany
+   *          desired number of recommendations
+   * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to
+   *         least
+   * @param includeKnownItems
+   *          whether to include items already known by the user in recommendations
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException;
+
+  /**
+   * @param userID
+   *          user for which recommendations are to be computed
+   * @param howMany
+   *          desired number of recommendations
+   * @param rescorer
+   *          rescoring function to apply before final list of recommendations is determined
+   * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to
+   *         least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer) throws TasteException;
+  
+  /**
+   * @param userID
+   *          user for which recommendations are to be computed
+   * @param howMany
+   *          desired number of recommendations
+   * @param rescorer
+   *          rescoring function to apply before final list of recommendations is determined
+   * @param includeKnownItems
+   *          whether to include items already known by the user in recommendations
+   * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to
+   *         least
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  
+  List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
+      throws TasteException;
+  
+  /**
+   * @param userID
+   *          user ID whose preference is to be estimated
+   * @param itemID
+   *          item ID to estimate preference for
+   * @return an estimated preference if the user has not expressed a preference for the item, or else the
+   *         user's actual preference for the item. If a preference cannot be estimated, returns
+   *         {@link Double#NaN}
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  float estimatePreference(long userID, long itemID) throws TasteException;
+  
+  /**
+   * @param userID
+   *          user to set preference for
+   * @param itemID
+   *          item to set preference for
+   * @param value
+   *          preference value
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  void setPreference(long userID, long itemID, float value) throws TasteException;
+  
+  /**
+   * @param userID
+   *          user from which to remove preference
+   * @param itemID
+   *          item for which to remove preference
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  void removePreference(long userID, long itemID) throws TasteException;
+
+  /**
+   * @return underlying {@link DataModel} used by this {@link Recommender} implementation
+   */
+  DataModel getDataModel();
+
+}