You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/28 14:54:45 UTC
[17/51] [partial] mahout git commit: NO-JIRA Clean up MR refactor
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
new file mode 100644
index 0000000..8ea1660
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.common.Weighting;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * An implementation of the Pearson correlation. For users X and Y, the following values are calculated:
+ * </p>
+ *
+ * <ul>
+ * <li>sumX2: sum of the square of all X's preference values</li>
+ * <li>sumY2: sum of the square of all Y's preference values</li>
+ * <li>sumXY: sum of the product of X and Y's preference value for all items for which both X and Y express a
+ * preference</li>
+ * </ul>
+ *
+ * <p>
+ * The correlation is then:
+ *
+ * <p>
+ * {@code sumXY / sqrt(sumX2 * sumY2)}
+ * </p>
+ *
+ * <p>
+ * Note that this correlation "centers" its data, shifts the user's preference values so that each of their
+ * means is 0. This is necessary to achieve expected behavior on all data sets.
+ * </p>
+ *
+ * <p>
+ * This correlation implementation is equivalent to the cosine similarity since the data it receives
+ * is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the angle
+ * between the two vectors defined by the users' preference values.
+ * </p>
+ *
+ * <p>
+ * For cosine similarity on uncentered data, see {@link UncenteredCosineSimilarity}.
+ * </p>
+ */
+public final class PearsonCorrelationSimilarity extends AbstractSimilarity {
+
+ /**
+ * @throws IllegalArgumentException if {@link DataModel} does not have preference values
+ */
+ public PearsonCorrelationSimilarity(DataModel dataModel) throws TasteException {
+ this(dataModel, Weighting.UNWEIGHTED);
+ }
+
+ /**
+ * @throws IllegalArgumentException if {@link DataModel} does not have preference values
+ */
+ public PearsonCorrelationSimilarity(DataModel dataModel, Weighting weighting) throws TasteException {
+ super(dataModel, weighting, true);
+ Preconditions.checkArgument(dataModel.hasPreferenceValues(), "DataModel doesn't have preference values");
+ }
+
+ @Override
+ double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
+ if (n == 0) {
+ return Double.NaN;
+ }
+ // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
+ // the data is assumed to be centered.
+ double denominator = Math.sqrt(sumX2) * Math.sqrt(sumY2);
+ if (denominator == 0.0) {
+ // One or both parties has -all- the same ratings;
+ // can't really say much similarity under this measure
+ return Double.NaN;
+ }
+ return sumXY / denominator;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
new file mode 100644
index 0000000..1116368
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * Like {@link PearsonCorrelationSimilarity}, but compares relative ranking of preference values instead of
+ * preference values themselves. That is, each user's preferences are sorted and then assign a rank as their
+ * preference value, with 1 being assigned to the least preferred item.
+ * </p>
+ */
+public final class SpearmanCorrelationSimilarity implements UserSimilarity {
+
+ private final DataModel dataModel;
+
+ public SpearmanCorrelationSimilarity(DataModel dataModel) {
+ this.dataModel = Preconditions.checkNotNull(dataModel);
+ }
+
+ @Override
+ public double userSimilarity(long userID1, long userID2) throws TasteException {
+ PreferenceArray xPrefs = dataModel.getPreferencesFromUser(userID1);
+ PreferenceArray yPrefs = dataModel.getPreferencesFromUser(userID2);
+ int xLength = xPrefs.length();
+ int yLength = yPrefs.length();
+
+ if (xLength <= 1 || yLength <= 1) {
+ return Double.NaN;
+ }
+
+ // Copy prefs since we need to modify pref values to ranks
+ xPrefs = xPrefs.clone();
+ yPrefs = yPrefs.clone();
+
+ // First sort by values from low to high
+ xPrefs.sortByValue();
+ yPrefs.sortByValue();
+
+ // Assign ranks from low to high
+ float nextRank = 1.0f;
+ for (int i = 0; i < xLength; i++) {
+ // ... but only for items that are common to both pref arrays
+ if (yPrefs.hasPrefWithItemID(xPrefs.getItemID(i))) {
+ xPrefs.setValue(i, nextRank);
+ nextRank += 1.0f;
+ }
+ // Other values are bogus but don't matter
+ }
+ nextRank = 1.0f;
+ for (int i = 0; i < yLength; i++) {
+ if (xPrefs.hasPrefWithItemID(yPrefs.getItemID(i))) {
+ yPrefs.setValue(i, nextRank);
+ nextRank += 1.0f;
+ }
+ }
+
+ xPrefs.sortByItem();
+ yPrefs.sortByItem();
+
+ long xIndex = xPrefs.getItemID(0);
+ long yIndex = yPrefs.getItemID(0);
+ int xPrefIndex = 0;
+ int yPrefIndex = 0;
+
+ double sumXYRankDiff2 = 0.0;
+ int count = 0;
+
+ while (true) {
+ int compare = xIndex < yIndex ? -1 : xIndex > yIndex ? 1 : 0;
+ if (compare == 0) {
+ double diff = xPrefs.getValue(xPrefIndex) - yPrefs.getValue(yPrefIndex);
+ sumXYRankDiff2 += diff * diff;
+ count++;
+ }
+ if (compare <= 0) {
+ if (++xPrefIndex >= xLength) {
+ break;
+ }
+ xIndex = xPrefs.getItemID(xPrefIndex);
+ }
+ if (compare >= 0) {
+ if (++yPrefIndex >= yLength) {
+ break;
+ }
+ yIndex = yPrefs.getItemID(yPrefIndex);
+ }
+ }
+
+ if (count <= 1) {
+ return Double.NaN;
+ }
+
+ // When ranks are unique, this formula actually gives the Pearson correlation
+ return 1.0 - 6.0 * sumXYRankDiff2 / (count * (count * count - 1));
+ }
+
+ @Override
+ public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+ RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
new file mode 100644
index 0000000..0c3a0a4
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+/**
+ * <p>
+ * An implementation of a "similarity" based on the <a
+ * href="http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29">
+ * Tanimoto coefficient</a>, or extended <a href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard
+ * coefficient</a>.
+ * </p>
+ *
+ * <p>
+ * This is intended for "binary" data sets where a user either expresses a generic "yes" preference for an
+ * item or has no preference. The actual preference values do not matter here, only their presence or absence.
+ * </p>
+ *
+ * <p>
+ * The value returned is in [0,1].
+ * </p>
+ */
+public final class TanimotoCoefficientSimilarity extends AbstractItemSimilarity implements UserSimilarity {
+
+ public TanimotoCoefficientSimilarity(DataModel dataModel) {
+ super(dataModel);
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public double userSimilarity(long userID1, long userID2) throws TasteException {
+
+ DataModel dataModel = getDataModel();
+ FastIDSet xPrefs = dataModel.getItemIDsFromUser(userID1);
+ FastIDSet yPrefs = dataModel.getItemIDsFromUser(userID2);
+
+ int xPrefsSize = xPrefs.size();
+ int yPrefsSize = yPrefs.size();
+ if (xPrefsSize == 0 && yPrefsSize == 0) {
+ return Double.NaN;
+ }
+ if (xPrefsSize == 0 || yPrefsSize == 0) {
+ return 0.0;
+ }
+
+ int intersectionSize =
+ xPrefsSize < yPrefsSize ? yPrefs.intersectionSize(xPrefs) : xPrefs.intersectionSize(yPrefs);
+ if (intersectionSize == 0) {
+ return Double.NaN;
+ }
+
+ int unionSize = xPrefsSize + yPrefsSize - intersectionSize;
+
+ return (double) intersectionSize / (double) unionSize;
+ }
+
+ @Override
+ public double itemSimilarity(long itemID1, long itemID2) throws TasteException {
+ int preferring1 = getDataModel().getNumUsersWithPreferenceFor(itemID1);
+ return doItemSimilarity(itemID1, itemID2, preferring1);
+ }
+
+ @Override
+ public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException {
+ int preferring1 = getDataModel().getNumUsersWithPreferenceFor(itemID1);
+ int length = itemID2s.length;
+ double[] result = new double[length];
+ for (int i = 0; i < length; i++) {
+ result[i] = doItemSimilarity(itemID1, itemID2s[i], preferring1);
+ }
+ return result;
+ }
+
+ private double doItemSimilarity(long itemID1, long itemID2, int preferring1) throws TasteException {
+ DataModel dataModel = getDataModel();
+ int preferring1and2 = dataModel.getNumUsersWithPreferenceFor(itemID1, itemID2);
+ if (preferring1and2 == 0) {
+ return Double.NaN;
+ }
+ int preferring2 = dataModel.getNumUsersWithPreferenceFor(itemID2);
+ return (double) preferring1and2 / (double) (preferring1 + preferring2 - preferring1and2);
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+ RefreshHelper.maybeRefresh(alreadyRefreshed, getDataModel());
+ }
+
+ @Override
+ public String toString() {
+ return "TanimotoCoefficientSimilarity[dataModel:" + getDataModel() + ']';
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java
new file mode 100644
index 0000000..6260606
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.common.Weighting;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * An implementation of the cosine similarity. The result is the cosine of the angle formed between
+ * the two preference vectors.
+ * </p>
+ *
+ * <p>
+ * Note that this similarity does not "center" its data, shifts the user's preference values so that each of their
+ * means is 0. For this behavior, use {@link PearsonCorrelationSimilarity}, which actually is mathematically
+ * equivalent for centered data.
+ * </p>
+ */
+public final class UncenteredCosineSimilarity extends AbstractSimilarity {
+
+ /**
+ * @throws IllegalArgumentException if {@link DataModel} does not have preference values
+ */
+ public UncenteredCosineSimilarity(DataModel dataModel) throws TasteException {
+ this(dataModel, Weighting.UNWEIGHTED);
+ }
+
+ /**
+ * @throws IllegalArgumentException if {@link DataModel} does not have preference values
+ */
+ public UncenteredCosineSimilarity(DataModel dataModel, Weighting weighting) throws TasteException {
+ super(dataModel, weighting, false);
+ Preconditions.checkArgument(dataModel.hasPreferenceValues(), "DataModel doesn't have preference values");
+ }
+
+ @Override
+ double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
+ if (n == 0) {
+ return Double.NaN;
+ }
+ double denominator = Math.sqrt(sumX2) * Math.sqrt(sumY2);
+ if (denominator == 0.0) {
+ // One or both parties has -all- the same ratings;
+ // can't really say much similarity under this measure
+ return Double.NaN;
+ }
+ return sumXY / denominator;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java
new file mode 100644
index 0000000..1ae45c2
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.file;
+
+import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * {@link Iterable} to be able to read a file linewise into a {@link GenericItemSimilarity}
+ */
+final class FileItemItemSimilarityIterable implements Iterable<GenericItemSimilarity.ItemItemSimilarity> {
+
+ private final File similaritiesFile;
+
+ FileItemItemSimilarityIterable(File similaritiesFile) {
+ this.similaritiesFile = similaritiesFile;
+ }
+
+ @Override
+ public Iterator<GenericItemSimilarity.ItemItemSimilarity> iterator() {
+ try {
+ return new FileItemItemSimilarityIterator(similaritiesFile);
+ } catch (IOException ioe) {
+ throw new IllegalStateException("Can't read " + similaritiesFile, ioe);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java
new file mode 100644
index 0000000..c071159
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.file;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ForwardingIterator;
+import com.google.common.collect.Iterators;
+import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
+import org.apache.mahout.common.iterator.FileLineIterator;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.regex.Pattern;
+
+/**
+ * a simple iterator using a {@link FileLineIterator} internally, parsing each
+ * line into an {@link GenericItemSimilarity.ItemItemSimilarity}.
+ */
+final class FileItemItemSimilarityIterator extends ForwardingIterator<GenericItemSimilarity.ItemItemSimilarity> {
+
+ private static final Pattern SEPARATOR = Pattern.compile("[,\t]");
+
+ private final Iterator<GenericItemSimilarity.ItemItemSimilarity> delegate;
+
+ FileItemItemSimilarityIterator(File similaritiesFile) throws IOException {
+ delegate = Iterators.transform(
+ new FileLineIterator(similaritiesFile),
+ new Function<String, GenericItemSimilarity.ItemItemSimilarity>() {
+ @Override
+ public GenericItemSimilarity.ItemItemSimilarity apply(String from) {
+ String[] tokens = SEPARATOR.split(from);
+ return new GenericItemSimilarity.ItemItemSimilarity(Long.parseLong(tokens[0]),
+ Long.parseLong(tokens[1]),
+ Double.parseDouble(tokens[2]));
+ }
+ });
+ }
+
+ @Override
+ protected Iterator<GenericItemSimilarity.ItemItemSimilarity> delegate() {
+ return delegate;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java
new file mode 100644
index 0000000..712b96a
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.file;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * An {@link ItemSimilarity} backed by a comma-delimited file. This class typically expects a file where each line
+ * contains an item ID, followed by another item ID, followed by a similarity value, separated by commas. You may also
+ * use tabs.
+ * </p>
+ *
+ * <p>
+ * The similarity value is assumed to be parseable as a {@code double} having a value between -1 and 1. The
+ * item IDs are parsed as {@code long}s. Similarities are symmetric so for a pair of items you do not have to
+ * include 2 lines in the file.
+ * </p>
+ *
+ * <p>
+ * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file
+ * has been reloaded very recently already.
+ * </p>
+ *
+ * <p>
+ * This class is not intended for use with very large amounts of data. For that, a JDBC-backed {@link ItemSimilarity}
+ * and a database are more appropriate.
+ * </p>
+ */
+public class FileItemSimilarity implements ItemSimilarity {
+
+ public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
+
+ private ItemSimilarity delegate;
+ private final ReentrantLock reloadLock;
+ private final File dataFile;
+ private long lastModified;
+ private final long minReloadIntervalMS;
+
+ private static final Logger log = LoggerFactory.getLogger(FileItemSimilarity.class);
+
+ /**
+ * @param dataFile
+ * file containing the similarity data
+ */
+ public FileItemSimilarity(File dataFile) {
+ this(dataFile, DEFAULT_MIN_RELOAD_INTERVAL_MS);
+ }
+
+ /**
+ * @param minReloadIntervalMS
+ * the minimum interval in milliseconds after which a full reload of the original datafile is done
+ * when refresh() is called
+ * @see #FileItemSimilarity(File)
+ */
+ public FileItemSimilarity(File dataFile, long minReloadIntervalMS) {
+ Preconditions.checkArgument(dataFile != null, "dataFile is null");
+ Preconditions.checkArgument(dataFile.exists() && !dataFile.isDirectory(),
+ "dataFile is missing or a directory: %s", dataFile);
+
+ log.info("Creating FileItemSimilarity for file {}", dataFile);
+
+ this.dataFile = dataFile.getAbsoluteFile();
+ this.lastModified = dataFile.lastModified();
+ this.minReloadIntervalMS = minReloadIntervalMS;
+ this.reloadLock = new ReentrantLock();
+
+ reload();
+ }
+
+ @Override
+ public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException {
+ return delegate.itemSimilarities(itemID1, itemID2s);
+ }
+
+ @Override
+ public long[] allSimilarItemIDs(long itemID) throws TasteException {
+ return delegate.allSimilarItemIDs(itemID);
+ }
+
+ @Override
+ public double itemSimilarity(long itemID1, long itemID2) throws TasteException {
+ return delegate.itemSimilarity(itemID1, itemID2);
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ if (dataFile.lastModified() > lastModified + minReloadIntervalMS) {
+ log.debug("File has changed; reloading...");
+ reload();
+ }
+ }
+
+ protected void reload() {
+ if (reloadLock.tryLock()) {
+ try {
+ long newLastModified = dataFile.lastModified();
+ delegate = new GenericItemSimilarity(new FileItemItemSimilarityIterable(dataFile));
+ lastModified = newLastModified;
+ } finally {
+ reloadLock.unlock();
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "FileItemSimilarity[dataFile:" + dataFile + ']';
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
new file mode 100644
index 0000000..631ec9b
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.precompute;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+import com.google.common.io.Closeables;
+import org.apache.commons.io.Charsets;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
+
+/**
+ * Persist the precomputed item similarities to a file that can later be used
+ * by a {@link org.apache.mahout.cf.taste.impl.similarity.file.FileItemSimilarity}
+ */
+public class FileSimilarItemsWriter implements SimilarItemsWriter {
+
+ private final File file;
+ private BufferedWriter writer;
+
+ public FileSimilarItemsWriter(File file) {
+ this.file = file;
+ }
+
+ @Override
+ public void open() throws IOException {
+ writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8));
+ }
+
+ @Override
+ public void add(SimilarItems similarItems) throws IOException {
+ String itemID = String.valueOf(similarItems.getItemID());
+ for (SimilarItem similarItem : similarItems.getSimilarItems()) {
+ writer.write(itemID);
+ writer.write(',');
+ writer.write(String.valueOf(similarItem.getItemID()));
+ writer.write(',');
+ writer.write(String.valueOf(similarItem.getSimilarity()));
+ writer.newLine();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ Closeables.close(writer, false);
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
new file mode 100644
index 0000000..b7b52cf
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.precompute;
+
+import com.google.common.io.Closeables;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Precompute item similarities in parallel on a single machine. The recommender given to this class must use a
+ * DataModel that holds the interactions in memory (such as
+ * {@link org.apache.mahout.cf.taste.impl.model.GenericDataModel} or
+ * {@link org.apache.mahout.cf.taste.impl.model.file.FileDataModel}) as fast random access to the data is required
+ */
+public class MultithreadedBatchItemSimilarities extends BatchItemSimilarities {
+
+ private int batchSize;
+
+ private static final int DEFAULT_BATCH_SIZE = 100;
+
+ private static final Logger log = LoggerFactory.getLogger(MultithreadedBatchItemSimilarities.class);
+
+ /**
+ * @param recommender recommender to use
+ * @param similarItemsPerItem number of similar items to compute per item
+ */
+ public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem) {
+ this(recommender, similarItemsPerItem, DEFAULT_BATCH_SIZE);
+ }
+
+ /**
+ * @param recommender recommender to use
+ * @param similarItemsPerItem number of similar items to compute per item
+ * @param batchSize size of item batches sent to worker threads
+ */
+ public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem, int batchSize) {
+ super(recommender, similarItemsPerItem);
+ this.batchSize = batchSize;
+ }
+
+ @Override
+ public int computeItemSimilarities(int degreeOfParallelism, int maxDurationInHours, SimilarItemsWriter writer)
+ throws IOException {
+
+ ExecutorService executorService = Executors.newFixedThreadPool(degreeOfParallelism + 1);
+
+ Output output = null;
+ try {
+ writer.open();
+
+ DataModel dataModel = getRecommender().getDataModel();
+
+ BlockingQueue<long[]> itemsIDsInBatches = queueItemIDsInBatches(dataModel, batchSize, degreeOfParallelism);
+ BlockingQueue<List<SimilarItems>> results = new LinkedBlockingQueue<>();
+
+ AtomicInteger numActiveWorkers = new AtomicInteger(degreeOfParallelism);
+ for (int n = 0; n < degreeOfParallelism; n++) {
+ executorService.execute(new SimilarItemsWorker(n, itemsIDsInBatches, results, numActiveWorkers));
+ }
+
+ output = new Output(results, writer, numActiveWorkers);
+ executorService.execute(output);
+
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ executorService.shutdown();
+ try {
+ boolean succeeded = executorService.awaitTermination(maxDurationInHours, TimeUnit.HOURS);
+ if (!succeeded) {
+ throw new RuntimeException("Unable to complete the computation in " + maxDurationInHours + " hours!");
+ }
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ Closeables.close(writer, false);
+ }
+
+ return output.getNumSimilaritiesProcessed();
+ }
+
+ private static BlockingQueue<long[]> queueItemIDsInBatches(DataModel dataModel, int batchSize,
+ int degreeOfParallelism)
+ throws TasteException {
+
+ LongPrimitiveIterator itemIDs = dataModel.getItemIDs();
+ int numItems = dataModel.getNumItems();
+
+ BlockingQueue<long[]> itemIDBatches = new LinkedBlockingQueue<>((numItems / batchSize) + 1);
+
+ long[] batch = new long[batchSize];
+ int pos = 0;
+ while (itemIDs.hasNext()) {
+ batch[pos] = itemIDs.nextLong();
+ pos++;
+ if (pos == batchSize) {
+ itemIDBatches.add(batch.clone());
+ pos = 0;
+ }
+ }
+
+ if (pos > 0) {
+ long[] lastBatch = new long[pos];
+ System.arraycopy(batch, 0, lastBatch, 0, pos);
+ itemIDBatches.add(lastBatch);
+ }
+
+ if (itemIDBatches.size() < degreeOfParallelism) {
+ throw new IllegalStateException("Degree of parallelism [" + degreeOfParallelism + "] " +
+ " is larger than number of batches [" + itemIDBatches.size() +"].");
+ }
+
+ log.info("Queued {} items in {} batches", numItems, itemIDBatches.size());
+
+ return itemIDBatches;
+ }
+
+
+ private static class Output implements Runnable {
+
+ private final BlockingQueue<List<SimilarItems>> results;
+ private final SimilarItemsWriter writer;
+ private final AtomicInteger numActiveWorkers;
+ private int numSimilaritiesProcessed = 0;
+
+ Output(BlockingQueue<List<SimilarItems>> results, SimilarItemsWriter writer, AtomicInteger numActiveWorkers) {
+ this.results = results;
+ this.writer = writer;
+ this.numActiveWorkers = numActiveWorkers;
+ }
+
+ private int getNumSimilaritiesProcessed() {
+ return numSimilaritiesProcessed;
+ }
+
+ @Override
+ public void run() {
+ while (numActiveWorkers.get() != 0 || !results.isEmpty()) {
+ try {
+ List<SimilarItems> similarItemsOfABatch = results.poll(10, TimeUnit.MILLISECONDS);
+ if (similarItemsOfABatch != null) {
+ for (SimilarItems similarItems : similarItemsOfABatch) {
+ writer.add(similarItems);
+ numSimilaritiesProcessed += similarItems.numSimilarItems();
+ }
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ }
+
+ private class SimilarItemsWorker implements Runnable {
+
+ private final int number;
+ private final BlockingQueue<long[]> itemIDBatches;
+ private final BlockingQueue<List<SimilarItems>> results;
+ private final AtomicInteger numActiveWorkers;
+
+ SimilarItemsWorker(int number, BlockingQueue<long[]> itemIDBatches, BlockingQueue<List<SimilarItems>> results,
+ AtomicInteger numActiveWorkers) {
+ this.number = number;
+ this.itemIDBatches = itemIDBatches;
+ this.results = results;
+ this.numActiveWorkers = numActiveWorkers;
+ }
+
+ @Override
+ public void run() {
+
+ int numBatchesProcessed = 0;
+ while (!itemIDBatches.isEmpty()) {
+ try {
+ long[] itemIDBatch = itemIDBatches.take();
+
+ List<SimilarItems> similarItemsOfBatch = new ArrayList<>(itemIDBatch.length);
+ for (long itemID : itemIDBatch) {
+ List<RecommendedItem> similarItems = getRecommender().mostSimilarItems(itemID, getSimilarItemsPerItem());
+ similarItemsOfBatch.add(new SimilarItems(itemID, similarItems));
+ }
+
+ results.offer(similarItemsOfBatch);
+
+ if (++numBatchesProcessed % 5 == 0) {
+ log.info("worker {} processed {} batches", number, numBatchesProcessed);
+ }
+
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ log.info("worker {} processed {} batches. done.", number, numBatchesProcessed);
+ numActiveWorkers.decrementAndGet();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
new file mode 100644
index 0000000..022d02d
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import java.io.Serializable;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+
+/**
+ * <p>
+ * Implementations represent a repository of information about users and their associated {@link Preference}s
+ * for items.
+ * </p>
+ */
+public interface DataModel extends Refreshable, Serializable {
+
+ /**
+ * @return all user IDs in the model, in order
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ LongPrimitiveIterator getUserIDs() throws TasteException;
+
+ /**
+ * @param userID
+ * ID of user to get prefs for
+ * @return user's preferences, ordered by item ID
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+ * if the user does not exist
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ PreferenceArray getPreferencesFromUser(long userID) throws TasteException;
+
+ /**
+ * @param userID
+ * ID of user to get prefs for
+ * @return IDs of items user expresses a preference for
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+ * if the user does not exist
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ FastIDSet getItemIDsFromUser(long userID) throws TasteException;
+
+ /**
+ * @return a {@link LongPrimitiveIterator} of all item IDs in the model, in order
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ LongPrimitiveIterator getItemIDs() throws TasteException;
+
+ /**
+ * @param itemID
+ * item ID
+ * @return all existing {@link Preference}s expressed for that item, ordered by user ID, as an array
+ * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
+ * if the item does not exist
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ PreferenceArray getPreferencesForItem(long itemID) throws TasteException;
+
+ /**
+ * Retrieves the preference value for a single user and item.
+ *
+ * @param userID
+ * user ID to get pref value from
+ * @param itemID
+ * item ID to get pref value for
+ * @return preference value from the given user for the given item or null if none exists
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+ * if the user does not exist
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ Float getPreferenceValue(long userID, long itemID) throws TasteException;
+
+ /**
+ * Retrieves the time at which a preference value from a user and item was set, if known.
+ * Time is expressed in the usual way, as a number of milliseconds since the epoch.
+ *
+ * @param userID user ID for preference in question
+ * @param itemID item ID for preference in question
+ * @return time at which preference was set or null if no preference exists or its time is not known
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException if the user does not exist
+ * @throws TasteException if an error occurs while accessing the data
+ */
+ Long getPreferenceTime(long userID, long itemID) throws TasteException;
+
+ /**
+ * @return total number of items known to the model. This is generally the union of all items preferred by
+ * at least one user but could include more.
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ int getNumItems() throws TasteException;
+
+ /**
+ * @return total number of users known to the model.
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ int getNumUsers() throws TasteException;
+
+ /**
+ * @param itemID item ID to check for
+ * @return the number of users who have expressed a preference for the item
+ * @throws TasteException if an error occurs while accessing the data
+ */
+ int getNumUsersWithPreferenceFor(long itemID) throws TasteException;
+
+ /**
+ * @param itemID1 first item ID to check for
+ * @param itemID2 second item ID to check for
+ * @return the number of users who have expressed a preference for the items
+ * @throws TasteException if an error occurs while accessing the data
+ */
+ int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException;
+
+ /**
+ * <p>
+ * Sets a particular preference (item plus rating) for a user.
+ * </p>
+ *
+ * @param userID
+ * user to set preference for
+ * @param itemID
+ * item to set preference for
+ * @param value
+ * preference value
+ * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
+ * if the item does not exist
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+ * if the user does not exist
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ void setPreference(long userID, long itemID, float value) throws TasteException;
+
+ /**
+ * <p>
+ * Removes a particular preference for a user.
+ * </p>
+ *
+ * @param userID
+ * user from which to remove preference
+ * @param itemID
+ * item to remove preference for
+ * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
+ * if the item does not exist
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+ * if the user does not exist
+ * @throws TasteException
+ * if an error occurs while accessing the data
+ */
+ void removePreference(long userID, long itemID) throws TasteException;
+
+ /**
+ * @return true if this implementation actually stores and returns distinct preference values;
+ * that is, if it is not a 'boolean' DataModel
+ */
+ boolean hasPreferenceValues();
+
+ /**
+ * @return the maximum preference value that is possible in the current problem domain being evaluated. For
+ * example, if the domain is movie ratings on a scale of 1 to 5, this should be 5. While a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender} may estimate a preference value above 5.0, it
+ * isn't "fair" to consider that the system is actually suggesting an impossible rating of, say, 5.4 stars.
+ * In practice the application would cap this estimate to 5.0. Since evaluators evaluate
+ * the difference between estimated and actual value, this at least prevents this effect from unfairly
+ * penalizing a {@link org.apache.mahout.cf.taste.recommender.Recommender}
+ */
+ float getMaxPreference();
+
+ /**
+ * @see #getMaxPreference()
+ */
+ float getMinPreference();
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java
new file mode 100644
index 0000000..cc477fe
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+
+/**
+ * <p>
+ * Mahout 0.2 changed the framework to operate only in terms of numeric (long) ID values for users and items.
+ * This is, obviously, not compatible with applications that used other key types -- most commonly
+ * {@link String}. Implementation of this class provide support for mapping String to longs and vice versa in
+ * order to provide a smoother migration path to applications that must still use strings as IDs.
+ * </p>
+ *
+ * <p>
+ * The mapping from strings to 64-bit numeric values is fixed here, to provide a standard implementation that
+ * is 'portable' or reproducible outside the framework easily. See {@link #toLongID(String)}.
+ * </p>
+ *
+ * <p>
+ * Because this mapping is deterministically computable, it does not need to be stored. Indeed, subclasses'
+ * job is to store the reverse mapping. There are an infinite number of strings but only a fixed number of
+ * longs, so, it is possible for two strings to map to the same value. Subclasses do not treat this as an
+ * error but rather retain only the most recent mapping, overwriting a previous mapping. The probability of
+ * collision in a 64-bit space is quite small, but not zero. However, in the context of a collaborative
+ * filtering problem, the consequence of a collision is small, at worst -- perhaps one user receives another
+ * recommendations.
+ * </p>
+ *
+ * @since 0.2
+ */
+public interface IDMigrator extends Refreshable {
+
+ /**
+ * @return the top 8 bytes of the MD5 hash of the bytes of the given {@link String}'s UTF-8 encoding as a
+ * long.
+ */
+ long toLongID(String stringID);
+
+ /**
+ * @return the string ID most recently associated with the given long ID, or null if doesn't exist
+ * @throws TasteException
+ * if an error occurs while retrieving the mapping
+ */
+ String toStringID(long longID) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java
new file mode 100644
index 0000000..e91ed48
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import javax.sql.DataSource;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+
+public interface JDBCDataModel extends DataModel {
+
+ /**
+ * @return {@link DataSource} underlying this model
+ */
+ DataSource getDataSource();
+
+ /**
+ * Hmm, should this exist elsewhere? seems like most relevant for a DB implementation, which is not in
+ * memory, which might want to export to memory.
+ *
+ * @return all user preference data
+ */
+ FastByIDMap<PreferenceArray> exportWithPrefs() throws TasteException;
+
+ FastByIDMap<FastIDSet> exportWithIDsOnly() throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java
new file mode 100644
index 0000000..fe0150a
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+/**
+ * <p>
+ * A {@link Preference} encapsulates an item and a preference value, which indicates the strength of the
+ * preference for it. {@link Preference}s are associated to users.
+ * </p>
+ */
+public interface Preference {
+
+ /** @return ID of user who prefers the item */
+ long getUserID();
+
+ /** @return item ID that is preferred */
+ long getItemID();
+
+ /**
+ * @return strength of the preference for that item. Zero should indicate "no preference either way";
+ * positive values indicate preference and negative values indicate dislike
+ */
+ float getValue();
+
+ /**
+ * Sets the strength of the preference for this item
+ *
+ * @param value
+ * new preference
+ */
+ void setValue(float value);
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java
new file mode 100644
index 0000000..3886bc6
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import java.io.Serializable;
+
+/**
+ * An alternate representation of an array of {@link Preference}. Implementations, in theory, can produce a
+ * more memory-efficient representation.
+ */
+public interface PreferenceArray extends Cloneable, Serializable, Iterable<Preference> {
+
+ /**
+ * @return size of length of the "array"
+ */
+ int length();
+
+ /**
+ * @param i
+ * index
+ * @return a materialized {@link Preference} representation of the preference at i
+ */
+ Preference get(int i);
+
+ /**
+ * Sets preference at i from information in the given {@link Preference}
+ *
+ * @param i
+ * @param pref
+ */
+ void set(int i, Preference pref);
+
+ /**
+ * @param i
+ * index
+ * @return user ID from preference at i
+ */
+ long getUserID(int i);
+
+ /**
+ * Sets user ID for preference at i.
+ *
+ * @param i
+ * index
+ * @param userID
+ * new user ID
+ */
+ void setUserID(int i, long userID);
+
+ /**
+ * @param i
+ * index
+ * @return item ID from preference at i
+ */
+ long getItemID(int i);
+
+ /**
+ * Sets item ID for preference at i.
+ *
+ * @param i
+ * index
+ * @param itemID
+ * new item ID
+ */
+ void setItemID(int i, long itemID);
+
+ /**
+ * @return all user or item IDs
+ */
+ long[] getIDs();
+
+ /**
+ * @param i
+ * index
+ * @return preference value from preference at i
+ */
+ float getValue(int i);
+
+ /**
+ * Sets preference value for preference at i.
+ *
+ * @param i
+ * index
+ * @param value
+ * new preference value
+ */
+ void setValue(int i, float value);
+
+ /**
+ * @return independent copy of this object
+ */
+ PreferenceArray clone();
+
+ /**
+ * Sorts underlying array by user ID, ascending.
+ */
+ void sortByUser();
+
+ /**
+ * Sorts underlying array by item ID, ascending.
+ */
+ void sortByItem();
+
+ /**
+ * Sorts underlying array by preference value, ascending.
+ */
+ void sortByValue();
+
+ /**
+ * Sorts underlying array by preference value, descending.
+ */
+ void sortByValueReversed();
+
+ /**
+ * @param userID
+ * user ID
+ * @return true if array contains a preference with given user ID
+ */
+ boolean hasPrefWithUserID(long userID);
+
+ /**
+ * @param itemID
+ * item ID
+ * @return true if array contains a preference with given item ID
+ */
+ boolean hasPrefWithItemID(long itemID);
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java
new file mode 100644
index 0000000..ff29a34
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+
+public interface UpdatableIDMigrator extends IDMigrator {
+
+ /**
+ * Stores the reverse long-to-String mapping in some kind of backing store. Note that this must be called
+ * directly (or indirectly through {@link #initialize(Iterable)}) for every String that might be encountered
+ * in the application, or else the mapping will not be known.
+ *
+ * @param longID
+ * long ID
+ * @param stringID
+ * string ID that maps to/from that long ID
+ * @throws TasteException
+ * if an error occurs while saving the mapping
+ */
+ void storeMapping(long longID, String stringID) throws TasteException;
+
+ /**
+ * Make the mapping aware of the given string IDs. This must be called initially before the implementation
+ * is used, or else it will not be aware of reverse long-to-String mappings.
+ *
+ * @throws TasteException
+ * if an error occurs while storing the mappings
+ */
+ void initialize(Iterable<String> stringIDs) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java
new file mode 100644
index 0000000..2a143e1
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.neighborhood;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+
+/**
+ * <p>
+ * Implementations of this interface compute a "neighborhood" of users like a given user. This neighborhood
+ * can be used to compute recommendations then.
+ * </p>
+ */
+public interface UserNeighborhood extends Refreshable {
+
+ /**
+ * @param userID
+ * ID of user for which a neighborhood will be computed
+ * @return IDs of users in the neighborhood
+ * @throws TasteException
+ * if an error occurs while accessing data
+ */
+ long[] getUserNeighborhood(long userID) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java
new file mode 100644
index 0000000..ada1949
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+/**
+ * Used to retrieve all items that could possibly be recommended to the user
+ */
+public interface CandidateItemsStrategy extends Refreshable {
+
+ /**
+ * @return IDs of all items that could be recommended to the user
+ */
+ FastIDSet getCandidateItems(long userID, PreferenceArray preferencesFromUser, DataModel dataModel,
+ boolean includeKnownItems) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java
new file mode 100644
index 0000000..d9a9cf7
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+/**
+ * <p>
+ * A {@link Rescorer} which operates on {@code long} primitive IDs, rather than arbitrary {@link Object}s.
+ * This is provided since most uses of this interface in the framework take IDs (as {@code long}) as an
+ * argument, and so this can be used to avoid unnecessary boxing/unboxing.
+ * </p>
+ */
+public interface IDRescorer {
+
+ /**
+ * @param id
+ * ID of thing (user, item, etc.) to rescore
+ * @param originalScore
+ * original score
+ * @return modified score, or {@link Double#NaN} to indicate that this should be excluded entirely
+ */
+ double rescore(long id, double originalScore);
+
+ /**
+ * Returns {@code true} to exclude the given thing.
+ *
+ * @param id
+ * ID of thing (user, item, etc.) to rescore
+ * @return {@code true} to exclude, {@code false} otherwise
+ */
+ boolean isFiltered(long id);
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
new file mode 100644
index 0000000..570f851
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import java.util.List;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.common.LongPair;
+
+/**
+ * <p>
+ * Interface implemented by "item-based" recommenders.
+ * </p>
+ */
+public interface ItemBasedRecommender extends Recommender {
+
+ /**
+ * @param itemID
+ * ID of item for which to find most similar other items
+ * @param howMany
+ * desired number of most similar items to find
+ * @return items most similar to the given item, ordered from most similar to least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+ */
+ List<RecommendedItem> mostSimilarItems(long itemID, int howMany) throws TasteException;
+
+ /**
+ * @param itemID
+ * ID of item for which to find most similar other items
+ * @param howMany
+ * desired number of most similar items to find
+ * @param rescorer
+ * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+ * items
+ * @return itemss most similar to the given item, ordered from most similar to least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+ */
+ List<RecommendedItem> mostSimilarItems(long itemID, int howMany, Rescorer<LongPair> rescorer) throws TasteException;
+
+ /**
+ * @param itemIDs
+ * IDs of item for which to find most similar other items
+ * @param howMany
+ * desired number of most similar items to find estimates used to determine most similar items
+ * @return items most similar to the given items, ordered from most similar to least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+ */
+ List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany) throws TasteException;
+
+ /**
+ * @param itemIDs
+ * IDs of item for which to find most similar other items
+ * @param howMany
+ * desired number of most similar items to find
+ * @param rescorer
+ * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+ * items
+ * @return items most similar to the given items, ordered from most similar to least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+ */
+ List<RecommendedItem> mostSimilarItems(long[] itemIDs,
+ int howMany,
+ Rescorer<LongPair> rescorer) throws TasteException;
+
+ /**
+ * @param itemIDs
+ * IDs of item for which to find most similar other items
+ * @param howMany
+ * desired number of most similar items to find
+ * @param excludeItemIfNotSimilarToAll
+ * exclude an item if it is not similar to each of the input items
+ * @return items most similar to the given items, ordered from most similar to least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+ */
+ List<RecommendedItem> mostSimilarItems(long[] itemIDs,
+ int howMany,
+ boolean excludeItemIfNotSimilarToAll) throws TasteException;
+
+ /**
+ * @param itemIDs
+ * IDs of item for which to find most similar other items
+ * @param howMany
+ * desired number of most similar items to find
+ * @param rescorer
+ * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+ * items
+ * @param excludeItemIfNotSimilarToAll
+ * exclude an item if it is not similar to each of the input items
+ * @return items most similar to the given items, ordered from most similar to least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+ */
+ List<RecommendedItem> mostSimilarItems(long[] itemIDs,
+ int howMany,
+ Rescorer<LongPair> rescorer,
+ boolean excludeItemIfNotSimilarToAll) throws TasteException;
+
+ /**
+ * <p>
+ * Lists the items that were most influential in recommending a given item to a given user. Exactly how this
+ * is determined is left to the implementation, but, generally this will return items that the user prefers
+ * and that are similar to the given item.
+ * </p>
+ *
+ * <p>
+ * This returns a {@link List} of {@link RecommendedItem} which is a little misleading since it's returning
+ * recommend<strong>ing</strong> items, but, I thought it more natural to just reuse this class since it
+ * encapsulates an item and value. The value here does not necessarily have a consistent interpretation or
+ * expected range; it will be higher the more influential the item was in the recommendation.
+ * </p>
+ *
+ * @param userID
+ * ID of user who was recommended the item
+ * @param itemID
+ * ID of item that was recommended
+ * @param howMany
+ * maximum number of items to return
+ * @return {@link List} of {@link RecommendedItem}, ordered from most influential in recommended the given
+ * item to least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
+ */
+ List<RecommendedItem> recommendedBecause(long userID, long itemID, int howMany) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
new file mode 100644
index 0000000..282ceff
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * Used to retrieve all items that could possibly be similar
+ */
+public interface MostSimilarItemsCandidateItemsStrategy extends Refreshable {
+
+ FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel) throws TasteException;
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java
new file mode 100644
index 0000000..1fcece8
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+/**
+ * <p>
+ * Implementations encapsulate items that are recommended, and include the item recommended and a value
+ * expressing the strength of the preference.
+ * </p>
+ */
+public interface RecommendedItem {
+
+ /** @return the recommended item ID */
+ long getItemID();
+
+ /**
+ * <p>
+ * A value expressing the strength of the preference for the recommended item. The range of the values
+ * depends on the implementation. Implementations must use larger values to express stronger preference.
+ * </p>
+ *
+ * @return strength of the preference
+ */
+ float getValue();
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java
new file mode 100644
index 0000000..4135aff
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import java.util.List;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * <p>
+ * Implementations of this interface can recommend items for a user. Implementations will likely take
+ * advantage of several classes in other packages here to compute this.
+ * </p>
+ */
+public interface Recommender extends Refreshable {
+
+ /**
+ * @param userID
+ * user for which recommendations are to be computed
+ * @param howMany
+ * desired number of recommendations
+ * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to
+ * least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ List<RecommendedItem> recommend(long userID, int howMany) throws TasteException;
+
+ /**
+ * @param userID
+ * user for which recommendations are to be computed
+ * @param howMany
+ * desired number of recommendations
+ * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to
+ * least
+ * @param includeKnownItems
+ * whether to include items already known by the user in recommendations
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException;
+
+ /**
+ * @param userID
+ * user for which recommendations are to be computed
+ * @param howMany
+ * desired number of recommendations
+ * @param rescorer
+ * rescoring function to apply before final list of recommendations is determined
+ * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to
+ * least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer) throws TasteException;
+
+ /**
+ * @param userID
+ * user for which recommendations are to be computed
+ * @param howMany
+ * desired number of recommendations
+ * @param rescorer
+ * rescoring function to apply before final list of recommendations is determined
+ * @param includeKnownItems
+ * whether to include items already known by the user in recommendations
+ * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to
+ * least
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+
+ List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems)
+ throws TasteException;
+
+ /**
+ * @param userID
+ * user ID whose preference is to be estimated
+ * @param itemID
+ * item ID to estimate preference for
+ * @return an estimated preference if the user has not expressed a preference for the item, or else the
+ * user's actual preference for the item. If a preference cannot be estimated, returns
+ * {@link Double#NaN}
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ float estimatePreference(long userID, long itemID) throws TasteException;
+
+ /**
+ * @param userID
+ * user to set preference for
+ * @param itemID
+ * item to set preference for
+ * @param value
+ * preference value
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ void setPreference(long userID, long itemID, float value) throws TasteException;
+
+ /**
+ * @param userID
+ * user from which to remove preference
+ * @param itemID
+ * item for which to remove preference
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ void removePreference(long userID, long itemID) throws TasteException;
+
+ /**
+ * @return underlying {@link DataModel} used by this {@link Recommender} implementation
+ */
+ DataModel getDataModel();
+
+}