You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/28 14:54:50 UTC
[22/51] [partial] mahout git commit: NO-JIRA Clean up MR refactor
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java
new file mode 100644
index 0000000..e267a39
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java
@@ -0,0 +1,431 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.eval;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Evaluate recommender by comparing order of all raw prefs with order in
+ * recommender's output for that user. Can also compare data models.
+ */
+public final class OrderBasedRecommenderEvaluator {
+
+ private static final Logger log = LoggerFactory.getLogger(OrderBasedRecommenderEvaluator.class);
+
+ private OrderBasedRecommenderEvaluator() {
+ }
+
+ public static void evaluate(Recommender recommender1,
+ Recommender recommender2,
+ int samples,
+ RunningAverage tracker,
+ String tag) throws TasteException {
+ printHeader();
+ LongPrimitiveIterator users = recommender1.getDataModel().getUserIDs();
+
+ while (users.hasNext()) {
+ long userID = users.nextLong();
+ List<RecommendedItem> recs1 = recommender1.recommend(userID, samples);
+ List<RecommendedItem> recs2 = recommender2.recommend(userID, samples);
+ FastIDSet commonSet = new FastIDSet();
+ long maxItemID = setBits(commonSet, recs1, samples);
+ FastIDSet otherSet = new FastIDSet();
+ maxItemID = Math.max(maxItemID, setBits(otherSet, recs2, samples));
+ int max = mask(commonSet, otherSet, maxItemID);
+ max = Math.min(max, samples);
+ if (max < 2) {
+ continue;
+ }
+ Long[] items1 = getCommonItems(commonSet, recs1, max);
+ Long[] items2 = getCommonItems(commonSet, recs2, max);
+ double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
+ tracker.addDatum(variance);
+ }
+ }
+
+ public static void evaluate(Recommender recommender,
+ DataModel model,
+ int samples,
+ RunningAverage tracker,
+ String tag) throws TasteException {
+ printHeader();
+ LongPrimitiveIterator users = recommender.getDataModel().getUserIDs();
+ while (users.hasNext()) {
+ long userID = users.nextLong();
+ List<RecommendedItem> recs1 = recommender.recommend(userID, model.getNumItems());
+ PreferenceArray prefs2 = model.getPreferencesFromUser(userID);
+ prefs2.sortByValueReversed();
+ FastIDSet commonSet = new FastIDSet();
+ long maxItemID = setBits(commonSet, recs1, samples);
+ FastIDSet otherSet = new FastIDSet();
+ maxItemID = Math.max(maxItemID, setBits(otherSet, prefs2, samples));
+ int max = mask(commonSet, otherSet, maxItemID);
+ max = Math.min(max, samples);
+ if (max < 2) {
+ continue;
+ }
+ Long[] items1 = getCommonItems(commonSet, recs1, max);
+ Long[] items2 = getCommonItems(commonSet, prefs2, max);
+ double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
+ tracker.addDatum(variance);
+ }
+ }
+
+ public static void evaluate(DataModel model1,
+ DataModel model2,
+ int samples,
+ RunningAverage tracker,
+ String tag) throws TasteException {
+ printHeader();
+ LongPrimitiveIterator users = model1.getUserIDs();
+ while (users.hasNext()) {
+ long userID = users.nextLong();
+ PreferenceArray prefs1 = model1.getPreferencesFromUser(userID);
+ PreferenceArray prefs2 = model2.getPreferencesFromUser(userID);
+ prefs1.sortByValueReversed();
+ prefs2.sortByValueReversed();
+ FastIDSet commonSet = new FastIDSet();
+ long maxItemID = setBits(commonSet, prefs1, samples);
+ FastIDSet otherSet = new FastIDSet();
+ maxItemID = Math.max(maxItemID, setBits(otherSet, prefs2, samples));
+ int max = mask(commonSet, otherSet, maxItemID);
+ max = Math.min(max, samples);
+ if (max < 2) {
+ continue;
+ }
+ Long[] items1 = getCommonItems(commonSet, prefs1, max);
+ Long[] items2 = getCommonItems(commonSet, prefs2, max);
+ double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
+ tracker.addDatum(variance);
+ }
+ }
+
+ /**
+ * This exists because FastIDSet has 'retainAll' as MASK, but there is
+ * no count of the number of items in the set. size() is supposed to do
+ * this but does not work.
+ */
+ private static int mask(FastIDSet commonSet, FastIDSet otherSet, long maxItemID) {
+ int count = 0;
+ for (int i = 0; i <= maxItemID; i++) {
+ if (commonSet.contains(i)) {
+ if (otherSet.contains(i)) {
+ count++;
+ } else {
+ commonSet.remove(i);
+ }
+ }
+ }
+ return count;
+ }
+
+ private static Long[] getCommonItems(FastIDSet commonSet, Iterable<RecommendedItem> recs, int max) {
+ Long[] commonItems = new Long[max];
+ int index = 0;
+ for (RecommendedItem rec : recs) {
+ Long item = rec.getItemID();
+ if (commonSet.contains(item)) {
+ commonItems[index++] = item;
+ }
+ if (index == max) {
+ break;
+ }
+ }
+ return commonItems;
+ }
+
+ private static Long[] getCommonItems(FastIDSet commonSet, PreferenceArray prefs1, int max) {
+ Long[] commonItems = new Long[max];
+ int index = 0;
+ for (int i = 0; i < prefs1.length(); i++) {
+ Long item = prefs1.getItemID(i);
+ if (commonSet.contains(item)) {
+ commonItems[index++] = item;
+ }
+ if (index == max) {
+ break;
+ }
+ }
+ return commonItems;
+ }
+
+ private static long setBits(FastIDSet modelSet, List<RecommendedItem> items, int max) {
+ long maxItem = -1;
+ for (int i = 0; i < items.size() && i < max; i++) {
+ long itemID = items.get(i).getItemID();
+ modelSet.add(itemID);
+ if (itemID > maxItem) {
+ maxItem = itemID;
+ }
+ }
+ return maxItem;
+ }
+
+ private static long setBits(FastIDSet modelSet, PreferenceArray prefs, int max) {
+ long maxItem = -1;
+ for (int i = 0; i < prefs.length() && i < max; i++) {
+ long itemID = prefs.getItemID(i);
+ modelSet.add(itemID);
+ if (itemID > maxItem) {
+ maxItem = itemID;
+ }
+ }
+ return maxItem;
+ }
+
+ private static void printHeader() {
+ log.info("tag,user,samples,common,hamming,bubble,rank,normal,score");
+ }
+
+ /**
+ * Common Subset Scoring
+ *
+ * These measurements are given the set of results that are common to both
+ * recommendation lists. They only get ordered lists.
+ *
+ * These measures all return raw numbers do not correlate among the tests.
+ * The numbers are not corrected against the total number of samples or the
+ * number of common items.
+ * The one contract is that all measures are 0 for an exact match and an
+ * increasing positive number as differences increase.
+ */
+ private static double scoreCommonSubset(String tag,
+ long userID,
+ int samples,
+ int subset,
+ Long[] itemsL,
+ Long[] itemsR) {
+ int[] vectorZ = new int[subset];
+ int[] vectorZabs = new int[subset];
+
+ long bubble = sort(itemsL, itemsR);
+ int hamming = slidingWindowHamming(itemsR, itemsL);
+ if (hamming > samples) {
+ throw new IllegalStateException();
+ }
+ getVectorZ(itemsR, itemsL, vectorZ, vectorZabs);
+ double normalW = normalWilcoxon(vectorZ, vectorZabs);
+ double meanRank = getMeanRank(vectorZabs);
+ // case statement for requested value
+ double variance = Math.sqrt(meanRank);
+ log.info("{},{},{},{},{},{},{},{},{}",
+ tag, userID, samples, subset, hamming, bubble, meanRank, normalW, variance);
+ return variance;
+ }
+
+ // simple sliding-window hamming distance: a[i or plus/minus 1] == b[i]
+ private static int slidingWindowHamming(Long[] itemsR, Long[] itemsL) {
+ int count = 0;
+ int samples = itemsR.length;
+
+ if (itemsR[0].equals(itemsL[0]) || itemsR[0].equals(itemsL[1])) {
+ count++;
+ }
+ for (int i = 1; i < samples - 1; i++) {
+ long itemID = itemsL[i];
+ if (itemsR[i] == itemID || itemsR[i - 1] == itemID || itemsR[i + 1] == itemID) {
+ count++;
+ }
+ }
+ if (itemsR[samples - 1].equals(itemsL[samples - 1]) || itemsR[samples - 1].equals(itemsL[samples - 2])) {
+ count++;
+ }
+ return count;
+ }
+
+ /**
+ * Normal-distribution probability value for matched sets of values.
+ * Based upon:
+ * http://comp9.psych.cornell.edu/Darlington/normscor.htm
+ *
+ * The Standard Wilcoxon is not used because it requires a lookup table.
+ */
+ static double normalWilcoxon(int[] vectorZ, int[] vectorZabs) {
+ int nitems = vectorZ.length;
+
+ double[] ranks = new double[nitems];
+ double[] ranksAbs = new double[nitems];
+ wilcoxonRanks(vectorZ, vectorZabs, ranks, ranksAbs);
+ return Math.min(getMeanWplus(ranks), getMeanWminus(ranks));
+ }
+
+ /**
+ * vector Z is a list of distances between the correct value and the recommended value
+ * Z[i] = position i of correct itemID - position of correct itemID in recommendation list
+ * can be positive or negative
+ * the smaller the better - means recommendations are closer
+ * both are the same length, and both sample from the same set
+ *
+ * destructive to items arrays - allows N log N instead of N^2 order
+ */
+ private static void getVectorZ(Long[] itemsR, Long[] itemsL, int[] vectorZ, int[] vectorZabs) {
+ int nitems = itemsR.length;
+ int bottom = 0;
+ int top = nitems - 1;
+ for (int i = 0; i < nitems; i++) {
+ long itemID = itemsR[i];
+ for (int j = bottom; j <= top; j++) {
+ if (itemsL[j] == null) {
+ continue;
+ }
+ long test = itemsL[j];
+ if (itemID == test) {
+ vectorZ[i] = i - j;
+ vectorZabs[i] = Math.abs(i - j);
+ if (j == bottom) {
+ bottom++;
+ } else if (j == top) {
+ top--;
+ } else {
+ itemsL[j] = null;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ /**
+ * Ranks are the position of the value from low to high, divided by the # of values.
+ * I had to walk through it a few times.
+ */
+ private static void wilcoxonRanks(int[] vectorZ, int[] vectorZabs, double[] ranks, double[] ranksAbs) {
+ int nitems = vectorZ.length;
+ int[] sorted = vectorZabs.clone();
+ Arrays.sort(sorted);
+ int zeros = 0;
+ for (; zeros < nitems; zeros++) {
+ if (sorted[zeros] > 0) {
+ break;
+ }
+ }
+ for (int i = 0; i < nitems; i++) {
+ double rank = 0.0;
+ int count = 0;
+ int score = vectorZabs[i];
+ for (int j = 0; j < nitems; j++) {
+ if (score == sorted[j]) {
+ rank += j + 1 - zeros;
+ count++;
+ } else if (score < sorted[j]) {
+ break;
+ }
+ }
+ if (vectorZ[i] != 0) {
+ ranks[i] = (rank / count) * (vectorZ[i] < 0 ? -1 : 1); // better be at least 1
+ ranksAbs[i] = Math.abs(ranks[i]);
+ }
+ }
+ }
+
+ private static double getMeanRank(int[] ranks) {
+ int nitems = ranks.length;
+ double sum = 0.0;
+ for (int rank : ranks) {
+ sum += rank;
+ }
+ return sum / nitems;
+ }
+
+ private static double getMeanWplus(double[] ranks) {
+ int nitems = ranks.length;
+ double sum = 0.0;
+ for (double rank : ranks) {
+ if (rank > 0) {
+ sum += rank;
+ }
+ }
+ return sum / nitems;
+ }
+
+ private static double getMeanWminus(double[] ranks) {
+ int nitems = ranks.length;
+ double sum = 0.0;
+ for (double rank : ranks) {
+ if (rank < 0) {
+ sum -= rank;
+ }
+ }
+ return sum / nitems;
+ }
+
+ /**
+ * Do bubble sort and return number of swaps needed to match preference lists.
+ * Sort itemsR using itemsL as the reference order.
+ */
+ static long sort(Long[] itemsL, Long[] itemsR) {
+ int length = itemsL.length;
+ if (length < 2) {
+ return 0;
+ }
+ if (length == 2) {
+ return itemsL[0].longValue() == itemsR[0].longValue() ? 0 : 1;
+ }
+ // 1) avoid changing originals; 2) primitive type is more efficient
+ long[] reference = new long[length];
+ long[] sortable = new long[length];
+ for (int i = 0; i < length; i++) {
+ reference[i] = itemsL[i];
+ sortable[i] = itemsR[i];
+ }
+ int sorted = 0;
+ long swaps = 0;
+ while (sorted < length - 1) {
+ // opportunistically trim back the top
+ while (length > 0 && reference[length - 1] == sortable[length - 1]) {
+ length--;
+ }
+ if (length == 0) {
+ break;
+ }
+ if (reference[sorted] == sortable[sorted]) {
+ sorted++;
+ } else {
+ for (int j = sorted; j < length - 1; j++) {
+ // do not swap anything already in place
+ int jump = 1;
+ if (reference[j] == sortable[j]) {
+ while (j + jump < length && reference[j + jump] == sortable[j + jump]) {
+ jump++;
+ }
+ }
+ if (j + jump < length && !(reference[j] == sortable[j] && reference[j + jump] == sortable[j + jump])) {
+ long tmp = sortable[j];
+ sortable[j] = sortable[j + 1];
+ sortable[j + 1] = tmp;
+ swaps++;
+ }
+ }
+ }
+ }
+ return swaps;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java
new file mode 100644
index 0000000..97eda10
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.eval;
+
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.Preference;
+
+/**
+ * <p>
+ * A {@link org.apache.mahout.cf.taste.eval.RecommenderEvaluator} which computes the "root mean squared"
+ * difference between predicted and actual ratings for users. This is the square root of the average of this
+ * difference, squared.
+ * </p>
+ */
+public final class RMSRecommenderEvaluator extends AbstractDifferenceRecommenderEvaluator {
+
+ private RunningAverage average;
+
+ @Override
+ protected void reset() {
+ average = new FullRunningAverage();
+ }
+
+ @Override
+ protected void processOneEstimate(float estimatedPreference, Preference realPref) {
+ double diff = realPref.getValue() - estimatedPreference;
+ average.addDatum(diff * diff);
+ }
+
+ @Override
+ protected double computeFinalEvaluation() {
+ return Math.sqrt(average.getAverage());
+ }
+
+ @Override
+ public String toString() {
+ return "RMSRecommenderEvaluator";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java
new file mode 100644
index 0000000..036d0b4
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.eval;
+
+import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.atomic.AtomicInteger;
+
+final class StatsCallable implements Callable<Void> {
+
+ private static final Logger log = LoggerFactory.getLogger(StatsCallable.class);
+
+ private final Callable<Void> delegate;
+ private final boolean logStats;
+ private final RunningAverageAndStdDev timing;
+ private final AtomicInteger noEstimateCounter;
+
+ StatsCallable(Callable<Void> delegate,
+ boolean logStats,
+ RunningAverageAndStdDev timing,
+ AtomicInteger noEstimateCounter) {
+ this.delegate = delegate;
+ this.logStats = logStats;
+ this.timing = timing;
+ this.noEstimateCounter = noEstimateCounter;
+ }
+
+ @Override
+ public Void call() throws Exception {
+ long start = System.currentTimeMillis();
+ delegate.call();
+ long end = System.currentTimeMillis();
+ timing.addDatum(end - start);
+ if (logStats) {
+ Runtime runtime = Runtime.getRuntime();
+ int average = (int) timing.getAverage();
+ log.info("Average time per recommendation: {}ms", average);
+ long totalMemory = runtime.totalMemory();
+ long memory = totalMemory - runtime.freeMemory();
+ log.info("Approximate memory used: {}MB / {}MB", memory / 1000000L, totalMemory / 1000000L);
+ log.info("Unable to recommend in {} cases", noEstimateCounter.get());
+ }
+ return null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java
new file mode 100644
index 0000000..a1a2a1f
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * Contains some features common to all implementations.
+ */
+public abstract class AbstractDataModel implements DataModel {
+
+ private float maxPreference;
+ private float minPreference;
+
+ protected AbstractDataModel() {
+ maxPreference = Float.NaN;
+ minPreference = Float.NaN;
+ }
+
+ @Override
+ public float getMaxPreference() {
+ return maxPreference;
+ }
+
+ protected void setMaxPreference(float maxPreference) {
+ this.maxPreference = maxPreference;
+ }
+
+ @Override
+ public float getMinPreference() {
+ return minPreference;
+ }
+
+ protected void setMinPreference(float minPreference) {
+ this.minPreference = minPreference;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
new file mode 100644
index 0000000..6efa6fa
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Collection;
+
+import org.apache.commons.io.Charsets;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.model.IDMigrator;
+
+public abstract class AbstractIDMigrator implements IDMigrator {
+
+ private final MessageDigest md5Digest;
+
+ protected AbstractIDMigrator() {
+ try {
+ md5Digest = MessageDigest.getInstance("MD5");
+ } catch (NoSuchAlgorithmException nsae) {
+ // Can't happen
+ throw new IllegalStateException(nsae);
+ }
+ }
+
+ /**
+ * @return most significant 8 bytes of the MD5 hash of the string, as a long
+ */
+ protected final long hash(String value) {
+ byte[] md5hash;
+ synchronized (md5Digest) {
+ md5hash = md5Digest.digest(value.getBytes(Charsets.UTF_8));
+ md5Digest.reset();
+ }
+ long hash = 0L;
+ for (int i = 0; i < 8; i++) {
+ hash = hash << 8 | md5hash[i] & 0x00000000000000FFL;
+ }
+ return hash;
+ }
+
+ @Override
+ public long toLongID(String stringID) {
+ return hash(stringID);
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java
new file mode 100644
index 0000000..cd3a434
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import javax.sql.DataSource;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.UpdatableIDMigrator;
+import org.apache.mahout.common.IOUtils;
+
+/**
+ * Implementation which stores the reverse long-to-String mapping in a database. Subclasses can override and
+ * configure the class to operate with particular databases by supplying appropriate SQL statements to the
+ * constructor.
+ */
+public abstract class AbstractJDBCIDMigrator extends AbstractIDMigrator implements UpdatableIDMigrator {
+
+ public static final String DEFAULT_MAPPING_TABLE = "taste_id_mapping";
+ public static final String DEFAULT_LONG_ID_COLUMN = "long_id";
+ public static final String DEFAULT_STRING_ID_COLUMN = "string_id";
+
+ private final DataSource dataSource;
+ private final String getStringIDSQL;
+ private final String storeMappingSQL;
+
+ /**
+ * @param getStringIDSQL
+ * SQL statement which selects one column, the String ID, from a mapping table. The statement
+ * should take one long parameter.
+ * @param storeMappingSQL
+ * SQL statement which saves a mapping from long to String. It should take two parameters, a long
+ * and a String.
+ */
+ protected AbstractJDBCIDMigrator(DataSource dataSource, String getStringIDSQL, String storeMappingSQL) {
+ this.dataSource = dataSource;
+ this.getStringIDSQL = getStringIDSQL;
+ this.storeMappingSQL = storeMappingSQL;
+ }
+
+ @Override
+ public final void storeMapping(long longID, String stringID) throws TasteException {
+ Connection conn = null;
+ PreparedStatement stmt = null;
+ try {
+ conn = dataSource.getConnection();
+ stmt = conn.prepareStatement(storeMappingSQL);
+ stmt.setLong(1, longID);
+ stmt.setString(2, stringID);
+ stmt.executeUpdate();
+ } catch (SQLException sqle) {
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.quietClose(null, stmt, conn);
+ }
+ }
+
+ @Override
+ public final String toStringID(long longID) throws TasteException {
+ Connection conn = null;
+ PreparedStatement stmt = null;
+ ResultSet rs = null;
+ try {
+ conn = dataSource.getConnection();
+ stmt = conn.prepareStatement(getStringIDSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+ stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+ stmt.setFetchSize(1);
+ stmt.setLong(1, longID);
+ rs = stmt.executeQuery();
+ if (rs.next()) {
+ return rs.getString(1);
+ } else {
+ return null;
+ }
+ } catch (SQLException sqle) {
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.quietClose(rs, stmt, conn);
+ }
+ }
+
+ @Override
+ public void initialize(Iterable<String> stringIDs) throws TasteException {
+ for (String stringID : stringIDs) {
+ storeMapping(toLongID(stringID), stringID);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java
new file mode 100644
index 0000000..6db5807
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.iterator.CountingIterator;
+
+/**
+ * <p>
+ * Like {@link BooleanUserPreferenceArray} but stores preferences for one item (all item IDs the same) rather
+ * than one user.
+ * </p>
+ *
+ * @see BooleanPreference
+ * @see BooleanUserPreferenceArray
+ * @see GenericItemPreferenceArray
+ */
+public final class BooleanItemPreferenceArray implements PreferenceArray {
+
+ private final long[] ids;
+ private long id;
+
+ public BooleanItemPreferenceArray(int size) {
+ this.ids = new long[size];
+ this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value
+ }
+
+ public BooleanItemPreferenceArray(List<? extends Preference> prefs, boolean forOneUser) {
+ this(prefs.size());
+ int size = prefs.size();
+ for (int i = 0; i < size; i++) {
+ Preference pref = prefs.get(i);
+ ids[i] = forOneUser ? pref.getItemID() : pref.getUserID();
+ }
+ if (size > 0) {
+ id = forOneUser ? prefs.get(0).getUserID() : prefs.get(0).getItemID();
+ }
+ }
+
+ /**
+ * This is a private copy constructor for clone().
+ */
+ private BooleanItemPreferenceArray(long[] ids, long id) {
+ this.ids = ids;
+ this.id = id;
+ }
+
+ @Override
+ public int length() {
+ return ids.length;
+ }
+
+ @Override
+ public Preference get(int i) {
+ return new PreferenceView(i);
+ }
+
+ @Override
+ public void set(int i, Preference pref) {
+ id = pref.getItemID();
+ ids[i] = pref.getUserID();
+ }
+
+ @Override
+ public long getUserID(int i) {
+ return ids[i];
+ }
+
+ @Override
+ public void setUserID(int i, long userID) {
+ ids[i] = userID;
+ }
+
+ @Override
+ public long getItemID(int i) {
+ return id;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Note that this method will actually set the item ID for <em>all</em> preferences.
+ */
+ @Override
+ public void setItemID(int i, long itemID) {
+ id = itemID;
+ }
+
+ /**
+ * @return all user IDs
+ */
+ @Override
+ public long[] getIDs() {
+ return ids;
+ }
+
+ @Override
+ public float getValue(int i) {
+ return 1.0f;
+ }
+
+ @Override
+ public void setValue(int i, float value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void sortByUser() {
+ Arrays.sort(ids);
+ }
+
+ @Override
+ public void sortByItem() { }
+
+ @Override
+ public void sortByValue() { }
+
+ @Override
+ public void sortByValueReversed() { }
+
+ @Override
+ public boolean hasPrefWithUserID(long userID) {
+ for (long id : ids) {
+ if (userID == id) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean hasPrefWithItemID(long itemID) {
+ return id == itemID;
+ }
+
+ @Override
+ public BooleanItemPreferenceArray clone() {
+ return new BooleanItemPreferenceArray(ids.clone(), id);
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof BooleanItemPreferenceArray)) {
+ return false;
+ }
+ BooleanItemPreferenceArray otherArray = (BooleanItemPreferenceArray) other;
+ return id == otherArray.id && Arrays.equals(ids, otherArray.ids);
+ }
+
+ @Override
+ public Iterator<Preference> iterator() {
+ return Iterators.transform(new CountingIterator(length()),
+ new Function<Integer, Preference>() {
+ @Override
+ public Preference apply(Integer from) {
+ return new PreferenceView(from);
+ }
+ });
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder(10 * ids.length);
+ result.append("BooleanItemPreferenceArray[itemID:");
+ result.append(id);
+ result.append(",{");
+ for (int i = 0; i < ids.length; i++) {
+ if (i > 0) {
+ result.append(',');
+ }
+ result.append(ids[i]);
+ }
+ result.append("}]");
+ return result.toString();
+ }
+
+ private final class PreferenceView implements Preference {
+
+ private final int i;
+
+ private PreferenceView(int i) {
+ this.i = i;
+ }
+
+ @Override
+ public long getUserID() {
+ return BooleanItemPreferenceArray.this.getUserID(i);
+ }
+
+ @Override
+ public long getItemID() {
+ return BooleanItemPreferenceArray.this.getItemID(i);
+ }
+
+ @Override
+ public float getValue() {
+ return 1.0f;
+ }
+
+ @Override
+ public void setValue(float value) {
+ throw new UnsupportedOperationException();
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java
new file mode 100644
index 0000000..2093af8
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.io.Serializable;
+
+import org.apache.mahout.cf.taste.model.Preference;
+
+/**
+ * Encapsulates a simple boolean "preference" for an item whose value does not matter (is fixed at 1.0). This
+ * is appropriate in situations where users conceptually have only a general "yes" preference for items,
+ * rather than a spectrum of preference values.
+ */
+public final class BooleanPreference implements Preference, Serializable {
+
+ private final long userID;
+ private final long itemID;
+
+ public BooleanPreference(long userID, long itemID) {
+ this.userID = userID;
+ this.itemID = itemID;
+ }
+
+ @Override
+ public long getUserID() {
+ return userID;
+ }
+
+ @Override
+ public long getItemID() {
+ return itemID;
+ }
+
+ @Override
+ public float getValue() {
+ return 1.0f;
+ }
+
+ @Override
+ public void setValue(float value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String toString() {
+ return "BooleanPreference[userID: " + userID + ", itemID:" + itemID + ']';
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java
new file mode 100644
index 0000000..629e0cf
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.iterator.CountingIterator;
+
+/**
+ * <p>
+ * Like {@link GenericUserPreferenceArray} but stores, conceptually, {@link BooleanPreference} objects which
+ * have no associated preference value.
+ * </p>
+ *
+ * @see BooleanPreference
+ * @see BooleanItemPreferenceArray
+ * @see GenericUserPreferenceArray
+ */
+public final class BooleanUserPreferenceArray implements PreferenceArray {
+
+ private final long[] ids;
+ private long id;
+
+ public BooleanUserPreferenceArray(int size) {
+ this.ids = new long[size];
+ this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value
+ }
+
+ public BooleanUserPreferenceArray(List<? extends Preference> prefs) {
+ this(prefs.size());
+ int size = prefs.size();
+ for (int i = 0; i < size; i++) {
+ Preference pref = prefs.get(i);
+ ids[i] = pref.getItemID();
+ }
+ if (size > 0) {
+ id = prefs.get(0).getUserID();
+ }
+ }
+
+ /**
+ * This is a private copy constructor for clone().
+ */
+ private BooleanUserPreferenceArray(long[] ids, long id) {
+ this.ids = ids;
+ this.id = id;
+ }
+
+ @Override
+ public int length() {
+ return ids.length;
+ }
+
+ @Override
+ public Preference get(int i) {
+ return new PreferenceView(i);
+ }
+
+ @Override
+ public void set(int i, Preference pref) {
+ id = pref.getUserID();
+ ids[i] = pref.getItemID();
+ }
+
+ @Override
+ public long getUserID(int i) {
+ return id;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Note that this method will actually set the user ID for <em>all</em> preferences.
+ */
+ @Override
+ public void setUserID(int i, long userID) {
+ id = userID;
+ }
+
+ @Override
+ public long getItemID(int i) {
+ return ids[i];
+ }
+
+ @Override
+ public void setItemID(int i, long itemID) {
+ ids[i] = itemID;
+ }
+
+ /**
+ * @return all item IDs
+ */
+ @Override
+ public long[] getIDs() {
+ return ids;
+ }
+
+ @Override
+ public float getValue(int i) {
+ return 1.0f;
+ }
+
+ @Override
+ public void setValue(int i, float value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void sortByUser() { }
+
+ @Override
+ public void sortByItem() {
+ Arrays.sort(ids);
+ }
+
+ @Override
+ public void sortByValue() { }
+
+ @Override
+ public void sortByValueReversed() { }
+
+ @Override
+ public boolean hasPrefWithUserID(long userID) {
+ return id == userID;
+ }
+
+ @Override
+ public boolean hasPrefWithItemID(long itemID) {
+ for (long id : ids) {
+ if (itemID == id) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public BooleanUserPreferenceArray clone() {
+ return new BooleanUserPreferenceArray(ids.clone(), id);
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof BooleanUserPreferenceArray)) {
+ return false;
+ }
+ BooleanUserPreferenceArray otherArray = (BooleanUserPreferenceArray) other;
+ return id == otherArray.id && Arrays.equals(ids, otherArray.ids);
+ }
+
+ @Override
+ public Iterator<Preference> iterator() {
+ return Iterators.transform(new CountingIterator(length()),
+ new Function<Integer, Preference>() {
+ @Override
+ public Preference apply(Integer from) {
+ return new PreferenceView(from);
+ }
+ });
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder(10 * ids.length);
+ result.append("BooleanUserPreferenceArray[userID:");
+ result.append(id);
+ result.append(",{");
+ for (int i = 0; i < ids.length; i++) {
+ if (i > 0) {
+ result.append(',');
+ }
+ result.append(ids[i]);
+ }
+ result.append("}]");
+ return result.toString();
+ }
+
+ private final class PreferenceView implements Preference {
+
+ private final int i;
+
+ private PreferenceView(int i) {
+ this.i = i;
+ }
+
+ @Override
+ public long getUserID() {
+ return BooleanUserPreferenceArray.this.getUserID(i);
+ }
+
+ @Override
+ public long getItemID() {
+ return BooleanUserPreferenceArray.this.getItemID(i);
+ }
+
+ @Override
+ public float getValue() {
+ return 1.0f;
+ }
+
+ @Override
+ public void setValue(float value) {
+ throw new UnsupportedOperationException();
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java
new file mode 100644
index 0000000..2c1ff4d
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java
@@ -0,0 +1,320 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * A simple {@link DataModel} which uses given user data as its data source. This implementation
+ * is mostly useful for small experiments and is not recommended for contexts where performance is important.
+ * </p>
+ */
+public final class GenericBooleanPrefDataModel extends AbstractDataModel {
+
+ private final long[] userIDs;
+ private final FastByIDMap<FastIDSet> preferenceFromUsers;
+ private final long[] itemIDs;
+ private final FastByIDMap<FastIDSet> preferenceForItems;
+ private final FastByIDMap<FastByIDMap<Long>> timestamps;
+
+ /**
+ * <p>
+ * Creates a new {@link GenericDataModel} from the given users (and their preferences). This
+ * {@link DataModel} retains all this information in memory and is effectively immutable.
+ * </p>
+ *
+ * @param userData users to include
+ */
+ public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData) {
+ this(userData, null);
+ }
+
+ /**
+ * <p>
+ * Creates a new {@link GenericDataModel} from the given users (and their preferences). This
+ * {@link DataModel} retains all this information in memory and is effectively immutable.
+ * </p>
+ *
+ * @param userData users to include
+ * @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch.
+ * User IDs are mapped to maps of item IDs to Long timestamps.
+ */
+ public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<Long>> timestamps) {
+ Preconditions.checkArgument(userData != null, "userData is null");
+
+ this.preferenceFromUsers = userData;
+ this.preferenceForItems = new FastByIDMap<>();
+ FastIDSet itemIDSet = new FastIDSet();
+ for (Map.Entry<Long, FastIDSet> entry : preferenceFromUsers.entrySet()) {
+ long userID = entry.getKey();
+ FastIDSet itemIDs = entry.getValue();
+ itemIDSet.addAll(itemIDs);
+ LongPrimitiveIterator it = itemIDs.iterator();
+ while (it.hasNext()) {
+ long itemID = it.nextLong();
+ FastIDSet userIDs = preferenceForItems.get(itemID);
+ if (userIDs == null) {
+ userIDs = new FastIDSet(2);
+ preferenceForItems.put(itemID, userIDs);
+ }
+ userIDs.add(userID);
+ }
+ }
+
+ this.itemIDs = itemIDSet.toArray();
+ itemIDSet = null; // Might help GC -- this is big
+ Arrays.sort(itemIDs);
+
+ this.userIDs = new long[userData.size()];
+ int i = 0;
+ LongPrimitiveIterator it = userData.keySetIterator();
+ while (it.hasNext()) {
+ userIDs[i++] = it.next();
+ }
+ Arrays.sort(userIDs);
+
+ this.timestamps = timestamps;
+ }
+
+ /**
+ * <p>
+ * Creates a new {@link GenericDataModel} containing an immutable copy of the data from another given
+ * {@link DataModel}.
+ * </p>
+ *
+ * @param dataModel
+ * {@link DataModel} to copy
+ * @throws TasteException
+ * if an error occurs while retrieving the other {@link DataModel}'s users
+ * @deprecated without direct replacement.
+ * Consider {@link #toDataMap(DataModel)} with {@link #GenericBooleanPrefDataModel(FastByIDMap)}
+ */
+ @Deprecated
+ public GenericBooleanPrefDataModel(DataModel dataModel) throws TasteException {
+ this(toDataMap(dataModel));
+ }
+
+ /**
+ * Exports the simple user IDs and associated item IDs in the data model.
+ *
+ * @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing
+ * that user's associated items
+ */
+ public static FastByIDMap<FastIDSet> toDataMap(DataModel dataModel) throws TasteException {
+ FastByIDMap<FastIDSet> data = new FastByIDMap<>(dataModel.getNumUsers());
+ LongPrimitiveIterator it = dataModel.getUserIDs();
+ while (it.hasNext()) {
+ long userID = it.nextLong();
+ data.put(userID, dataModel.getItemIDsFromUser(userID));
+ }
+ return data;
+ }
+
+ public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<PreferenceArray> data) {
+ for (Map.Entry<Long,Object> entry : ((FastByIDMap<Object>) (FastByIDMap<?>) data).entrySet()) {
+ PreferenceArray prefArray = (PreferenceArray) entry.getValue();
+ int size = prefArray.length();
+ FastIDSet itemIDs = new FastIDSet(size);
+ for (int i = 0; i < size; i++) {
+ itemIDs.add(prefArray.getItemID(i));
+ }
+ entry.setValue(itemIDs);
+ }
+ return (FastByIDMap<FastIDSet>) (FastByIDMap<?>) data;
+ }
+
+ /**
+ * This is used mostly internally to the framework, and shouldn't be relied upon otherwise.
+ */
+ public FastByIDMap<FastIDSet> getRawUserData() {
+ return this.preferenceFromUsers;
+ }
+
+ /**
+ * This is used mostly internally to the framework, and shouldn't be relied upon otherwise.
+ */
+ public FastByIDMap<FastIDSet> getRawItemData() {
+ return this.preferenceForItems;
+ }
+
+ @Override
+ public LongPrimitiveArrayIterator getUserIDs() {
+ return new LongPrimitiveArrayIterator(userIDs);
+ }
+
+ /**
+ * @throws NoSuchUserException
+ * if there is no such user
+ */
+ @Override
+ public PreferenceArray getPreferencesFromUser(long userID) throws NoSuchUserException {
+ FastIDSet itemIDs = preferenceFromUsers.get(userID);
+ if (itemIDs == null) {
+ throw new NoSuchUserException(userID);
+ }
+ PreferenceArray prefArray = new BooleanUserPreferenceArray(itemIDs.size());
+ int i = 0;
+ LongPrimitiveIterator it = itemIDs.iterator();
+ while (it.hasNext()) {
+ prefArray.setUserID(i, userID);
+ prefArray.setItemID(i, it.nextLong());
+ i++;
+ }
+ return prefArray;
+ }
+
+ @Override
+ public FastIDSet getItemIDsFromUser(long userID) throws TasteException {
+ FastIDSet itemIDs = preferenceFromUsers.get(userID);
+ if (itemIDs == null) {
+ throw new NoSuchUserException(userID);
+ }
+ return itemIDs;
+ }
+
+ @Override
+ public LongPrimitiveArrayIterator getItemIDs() {
+ return new LongPrimitiveArrayIterator(itemIDs);
+ }
+
+ @Override
+ public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException {
+ FastIDSet userIDs = preferenceForItems.get(itemID);
+ if (userIDs == null) {
+ throw new NoSuchItemException(itemID);
+ }
+ PreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.size());
+ int i = 0;
+ LongPrimitiveIterator it = userIDs.iterator();
+ while (it.hasNext()) {
+ prefArray.setUserID(i, it.nextLong());
+ prefArray.setItemID(i, itemID);
+ i++;
+ }
+ return prefArray;
+ }
+
+ @Override
+ public Float getPreferenceValue(long userID, long itemID) throws NoSuchUserException {
+ FastIDSet itemIDs = preferenceFromUsers.get(userID);
+ if (itemIDs == null) {
+ throw new NoSuchUserException(userID);
+ }
+ if (itemIDs.contains(itemID)) {
+ return 1.0f;
+ }
+ return null;
+ }
+
+ @Override
+ public Long getPreferenceTime(long userID, long itemID) throws TasteException {
+ if (timestamps == null) {
+ return null;
+ }
+ FastByIDMap<Long> itemTimestamps = timestamps.get(userID);
+ if (itemTimestamps == null) {
+ throw new NoSuchUserException(userID);
+ }
+ return itemTimestamps.get(itemID);
+ }
+
+ @Override
+ public int getNumItems() {
+ return itemIDs.length;
+ }
+
+ @Override
+ public int getNumUsers() {
+ return userIDs.length;
+ }
+
+ @Override
+ public int getNumUsersWithPreferenceFor(long itemID) {
+ FastIDSet userIDs1 = preferenceForItems.get(itemID);
+ return userIDs1 == null ? 0 : userIDs1.size();
+ }
+
+ @Override
+ public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) {
+ FastIDSet userIDs1 = preferenceForItems.get(itemID1);
+ if (userIDs1 == null) {
+ return 0;
+ }
+ FastIDSet userIDs2 = preferenceForItems.get(itemID2);
+ if (userIDs2 == null) {
+ return 0;
+ }
+ return userIDs1.size() < userIDs2.size()
+ ? userIDs2.intersectionSize(userIDs1)
+ : userIDs1.intersectionSize(userIDs2);
+ }
+
+ @Override
+ public void removePreference(long userID, long itemID) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setPreference(long userID, long itemID, float value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ // Does nothing
+ }
+
+ @Override
+ public boolean hasPreferenceValues() {
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder(200);
+ result.append("GenericBooleanPrefDataModel[users:");
+ for (int i = 0; i < Math.min(3, userIDs.length); i++) {
+ if (i > 0) {
+ result.append(',');
+ }
+ result.append(userIDs[i]);
+ }
+ if (userIDs.length > 3) {
+ result.append("...");
+ }
+ result.append(']');
+ return result.toString();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
new file mode 100644
index 0000000..f58d349
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
@@ -0,0 +1,361 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.collect.Lists;
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * A simple {@link DataModel} which uses a given {@link List} of users as its data source. This implementation
+ * is mostly useful for small experiments and is not recommended for contexts where performance is important.
+ * </p>
+ */
+public final class GenericDataModel extends AbstractDataModel {
+
+ private static final Logger log = LoggerFactory.getLogger(GenericDataModel.class);
+
+ private final long[] userIDs;
+ private final FastByIDMap<PreferenceArray> preferenceFromUsers;
+ private final long[] itemIDs;
+ private final FastByIDMap<PreferenceArray> preferenceForItems;
+ private final FastByIDMap<FastByIDMap<Long>> timestamps;
+
+ /**
+ * <p>
+ * Creates a new {@link GenericDataModel} from the given users (and their preferences). This
+ * {@link DataModel} retains all this information in memory and is effectively immutable.
+ * </p>
+ *
+ * @param userData users to include; (see also {@link #toDataMap(FastByIDMap, boolean)})
+ */
+ public GenericDataModel(FastByIDMap<PreferenceArray> userData) {
+ this(userData, null);
+ }
+
+ /**
+ * <p>
+ * Creates a new {@link GenericDataModel} from the given users (and their preferences). This
+ * {@link DataModel} retains all this information in memory and is effectively immutable.
+ * </p>
+ *
+ * @param userData users to include; (see also {@link #toDataMap(FastByIDMap, boolean)})
+ * @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch.
+ * User IDs are mapped to maps of item IDs to Long timestamps.
+ */
+ public GenericDataModel(FastByIDMap<PreferenceArray> userData, FastByIDMap<FastByIDMap<Long>> timestamps) {
+ Preconditions.checkArgument(userData != null, "userData is null");
+
+ this.preferenceFromUsers = userData;
+ FastByIDMap<Collection<Preference>> prefsForItems = new FastByIDMap<>();
+ FastIDSet itemIDSet = new FastIDSet();
+ int currentCount = 0;
+ float maxPrefValue = Float.NEGATIVE_INFINITY;
+ float minPrefValue = Float.POSITIVE_INFINITY;
+ for (Map.Entry<Long, PreferenceArray> entry : preferenceFromUsers.entrySet()) {
+ PreferenceArray prefs = entry.getValue();
+ prefs.sortByItem();
+ for (Preference preference : prefs) {
+ long itemID = preference.getItemID();
+ itemIDSet.add(itemID);
+ Collection<Preference> prefsForItem = prefsForItems.get(itemID);
+ if (prefsForItem == null) {
+ prefsForItem = Lists.newArrayListWithCapacity(2);
+ prefsForItems.put(itemID, prefsForItem);
+ }
+ prefsForItem.add(preference);
+ float value = preference.getValue();
+ if (value > maxPrefValue) {
+ maxPrefValue = value;
+ }
+ if (value < minPrefValue) {
+ minPrefValue = value;
+ }
+ }
+ if (++currentCount % 10000 == 0) {
+ log.info("Processed {} users", currentCount);
+ }
+ }
+ log.info("Processed {} users", currentCount);
+
+ setMinPreference(minPrefValue);
+ setMaxPreference(maxPrefValue);
+
+ this.itemIDs = itemIDSet.toArray();
+ itemIDSet = null; // Might help GC -- this is big
+ Arrays.sort(itemIDs);
+
+ this.preferenceForItems = toDataMap(prefsForItems, false);
+
+ for (Map.Entry<Long, PreferenceArray> entry : preferenceForItems.entrySet()) {
+ entry.getValue().sortByUser();
+ }
+
+ this.userIDs = new long[userData.size()];
+ int i = 0;
+ LongPrimitiveIterator it = userData.keySetIterator();
+ while (it.hasNext()) {
+ userIDs[i++] = it.next();
+ }
+ Arrays.sort(userIDs);
+
+ this.timestamps = timestamps;
+ }
+
+ /**
+ * <p>
+ * Creates a new {@link GenericDataModel} containing an immutable copy of the data from another given
+ * {@link DataModel}.
+ * </p>
+ *
+ * @param dataModel {@link DataModel} to copy
+ * @throws TasteException if an error occurs while retrieving the other {@link DataModel}'s users
+ * @deprecated without direct replacement.
+ * Consider {@link #toDataMap(DataModel)} with {@link #GenericDataModel(FastByIDMap)}
+ */
+ @Deprecated
+ public GenericDataModel(DataModel dataModel) throws TasteException {
+ this(toDataMap(dataModel));
+ }
+
+ /**
+ * Swaps, in-place, {@link List}s for arrays in {@link Map} values .
+ *
+ * @return input value
+ */
+ public static FastByIDMap<PreferenceArray> toDataMap(FastByIDMap<Collection<Preference>> data,
+ boolean byUser) {
+ for (Map.Entry<Long,Object> entry : ((FastByIDMap<Object>) (FastByIDMap<?>) data).entrySet()) {
+ List<Preference> prefList = (List<Preference>) entry.getValue();
+ entry.setValue(byUser ? new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray(
+ prefList));
+ }
+ return (FastByIDMap<PreferenceArray>) (FastByIDMap<?>) data;
+ }
+
+ /**
+ * Exports the simple user IDs and preferences in the data model.
+ *
+ * @return a {@link FastByIDMap} mapping user IDs to {@link PreferenceArray}s representing
+ * that user's preferences
+ */
+ public static FastByIDMap<PreferenceArray> toDataMap(DataModel dataModel) throws TasteException {
+ FastByIDMap<PreferenceArray> data = new FastByIDMap<>(dataModel.getNumUsers());
+ LongPrimitiveIterator it = dataModel.getUserIDs();
+ while (it.hasNext()) {
+ long userID = it.nextLong();
+ data.put(userID, dataModel.getPreferencesFromUser(userID));
+ }
+ return data;
+ }
+
+ /**
+ * This is used mostly internally to the framework, and shouldn't be relied upon otherwise.
+ */
+ public FastByIDMap<PreferenceArray> getRawUserData() {
+ return this.preferenceFromUsers;
+ }
+
+ /**
+ * This is used mostly internally to the framework, and shouldn't be relied upon otherwise.
+ */
+ public FastByIDMap<PreferenceArray> getRawItemData() {
+ return this.preferenceForItems;
+ }
+
+ @Override
+ public LongPrimitiveArrayIterator getUserIDs() {
+ return new LongPrimitiveArrayIterator(userIDs);
+ }
+
+ /**
+ * @throws NoSuchUserException
+ * if there is no such user
+ */
+ @Override
+ public PreferenceArray getPreferencesFromUser(long userID) throws NoSuchUserException {
+ PreferenceArray prefs = preferenceFromUsers.get(userID);
+ if (prefs == null) {
+ throw new NoSuchUserException(userID);
+ }
+ return prefs;
+ }
+
+ @Override
+ public FastIDSet getItemIDsFromUser(long userID) throws TasteException {
+ PreferenceArray prefs = getPreferencesFromUser(userID);
+ int size = prefs.length();
+ FastIDSet result = new FastIDSet(size);
+ for (int i = 0; i < size; i++) {
+ result.add(prefs.getItemID(i));
+ }
+ return result;
+ }
+
+ @Override
+ public LongPrimitiveArrayIterator getItemIDs() {
+ return new LongPrimitiveArrayIterator(itemIDs);
+ }
+
+ @Override
+ public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException {
+ PreferenceArray prefs = preferenceForItems.get(itemID);
+ if (prefs == null) {
+ throw new NoSuchItemException(itemID);
+ }
+ return prefs;
+ }
+
+ @Override
+ public Float getPreferenceValue(long userID, long itemID) throws TasteException {
+ PreferenceArray prefs = getPreferencesFromUser(userID);
+ int size = prefs.length();
+ for (int i = 0; i < size; i++) {
+ if (prefs.getItemID(i) == itemID) {
+ return prefs.getValue(i);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Long getPreferenceTime(long userID, long itemID) throws TasteException {
+ if (timestamps == null) {
+ return null;
+ }
+ FastByIDMap<Long> itemTimestamps = timestamps.get(userID);
+ if (itemTimestamps == null) {
+ throw new NoSuchUserException(userID);
+ }
+ return itemTimestamps.get(itemID);
+ }
+
+ @Override
+ public int getNumItems() {
+ return itemIDs.length;
+ }
+
+ @Override
+ public int getNumUsers() {
+ return userIDs.length;
+ }
+
+ @Override
+ public int getNumUsersWithPreferenceFor(long itemID) {
+ PreferenceArray prefs1 = preferenceForItems.get(itemID);
+ return prefs1 == null ? 0 : prefs1.length();
+ }
+
+ @Override
+ public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) {
+ PreferenceArray prefs1 = preferenceForItems.get(itemID1);
+ if (prefs1 == null) {
+ return 0;
+ }
+ PreferenceArray prefs2 = preferenceForItems.get(itemID2);
+ if (prefs2 == null) {
+ return 0;
+ }
+
+ int size1 = prefs1.length();
+ int size2 = prefs2.length();
+ int count = 0;
+ int i = 0;
+ int j = 0;
+ long userID1 = prefs1.getUserID(0);
+ long userID2 = prefs2.getUserID(0);
+ while (true) {
+ if (userID1 < userID2) {
+ if (++i == size1) {
+ break;
+ }
+ userID1 = prefs1.getUserID(i);
+ } else if (userID1 > userID2) {
+ if (++j == size2) {
+ break;
+ }
+ userID2 = prefs2.getUserID(j);
+ } else {
+ count++;
+ if (++i == size1 || ++j == size2) {
+ break;
+ }
+ userID1 = prefs1.getUserID(i);
+ userID2 = prefs2.getUserID(j);
+ }
+ }
+ return count;
+ }
+
+ @Override
+ public void removePreference(long userID, long itemID) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setPreference(long userID, long itemID, float value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ // Does nothing
+ }
+
+ @Override
+ public boolean hasPreferenceValues() {
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder(200);
+ result.append("GenericDataModel[users:");
+ for (int i = 0; i < Math.min(3, userIDs.length); i++) {
+ if (i > 0) {
+ result.append(',');
+ }
+ result.append(userIDs[i]);
+ }
+ if (userIDs.length > 3) {
+ result.append("...");
+ }
+ result.append(']');
+ return result.toString();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java
new file mode 100644
index 0000000..fde9314
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java
@@ -0,0 +1,301 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.iterator.CountingIterator;
+
+/**
+ * <p>
+ * Like {@link GenericUserPreferenceArray} but stores preferences for one item (all item IDs the same) rather
+ * than one user.
+ * </p>
+ *
+ * @see BooleanItemPreferenceArray
+ * @see GenericUserPreferenceArray
+ * @see GenericPreference
+ */
+public final class GenericItemPreferenceArray implements PreferenceArray {
+
+ private static final int USER = 0;
+ private static final int VALUE = 2;
+ private static final int VALUE_REVERSED = 3;
+
+ private final long[] ids;
+ private long id;
+ private final float[] values;
+
+ public GenericItemPreferenceArray(int size) {
+ this.ids = new long[size];
+ values = new float[size];
+ this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value
+ }
+
+ public GenericItemPreferenceArray(List<? extends Preference> prefs) {
+ this(prefs.size());
+ int size = prefs.size();
+ long itemID = Long.MIN_VALUE;
+ for (int i = 0; i < size; i++) {
+ Preference pref = prefs.get(i);
+ ids[i] = pref.getUserID();
+ if (i == 0) {
+ itemID = pref.getItemID();
+ } else {
+ if (itemID != pref.getItemID()) {
+ throw new IllegalArgumentException("Not all item IDs are the same");
+ }
+ }
+ values[i] = pref.getValue();
+ }
+ id = itemID;
+ }
+
+ /**
+ * This is a private copy constructor for clone().
+ */
+ private GenericItemPreferenceArray(long[] ids, long id, float[] values) {
+ this.ids = ids;
+ this.id = id;
+ this.values = values;
+ }
+
+ @Override
+ public int length() {
+ return ids.length;
+ }
+
+ @Override
+ public Preference get(int i) {
+ return new PreferenceView(i);
+ }
+
+ @Override
+ public void set(int i, Preference pref) {
+ id = pref.getItemID();
+ ids[i] = pref.getUserID();
+ values[i] = pref.getValue();
+ }
+
+ @Override
+ public long getUserID(int i) {
+ return ids[i];
+ }
+
+ @Override
+ public void setUserID(int i, long userID) {
+ ids[i] = userID;
+ }
+
+ @Override
+ public long getItemID(int i) {
+ return id;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Note that this method will actually set the item ID for <em>all</em> preferences.
+ */
+ @Override
+ public void setItemID(int i, long itemID) {
+ id = itemID;
+ }
+
+ /**
+ * @return all user IDs
+ */
+ @Override
+ public long[] getIDs() {
+ return ids;
+ }
+
+ @Override
+ public float getValue(int i) {
+ return values[i];
+ }
+
+ @Override
+ public void setValue(int i, float value) {
+ values[i] = value;
+ }
+
+ @Override
+ public void sortByUser() {
+ lateralSort(USER);
+ }
+
+ @Override
+ public void sortByItem() { }
+
+ @Override
+ public void sortByValue() {
+ lateralSort(VALUE);
+ }
+
+ @Override
+ public void sortByValueReversed() {
+ lateralSort(VALUE_REVERSED);
+ }
+
+ @Override
+ public boolean hasPrefWithUserID(long userID) {
+ for (long id : ids) {
+ if (userID == id) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean hasPrefWithItemID(long itemID) {
+ return id == itemID;
+ }
+
+ private void lateralSort(int type) {
+ //Comb sort: http://en.wikipedia.org/wiki/Comb_sort
+ int length = length();
+ int gap = length;
+ boolean swapped = false;
+ while (gap > 1 || swapped) {
+ if (gap > 1) {
+ gap /= 1.247330950103979; // = 1 / (1 - 1/e^phi)
+ }
+ swapped = false;
+ int max = length - gap;
+ for (int i = 0; i < max; i++) {
+ int other = i + gap;
+ if (isLess(other, i, type)) {
+ swap(i, other);
+ swapped = true;
+ }
+ }
+ }
+ }
+
+ private boolean isLess(int i, int j, int type) {
+ switch (type) {
+ case USER:
+ return ids[i] < ids[j];
+ case VALUE:
+ return values[i] < values[j];
+ case VALUE_REVERSED:
+ return values[i] > values[j];
+ default:
+ throw new IllegalStateException();
+ }
+ }
+
+ private void swap(int i, int j) {
+ long temp1 = ids[i];
+ float temp2 = values[i];
+ ids[i] = ids[j];
+ values[i] = values[j];
+ ids[j] = temp1;
+ values[j] = temp2;
+ }
+
+ @Override
+ public GenericItemPreferenceArray clone() {
+ return new GenericItemPreferenceArray(ids.clone(), id, values.clone());
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids) ^ Arrays.hashCode(values);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof GenericItemPreferenceArray)) {
+ return false;
+ }
+ GenericItemPreferenceArray otherArray = (GenericItemPreferenceArray) other;
+ return id == otherArray.id && Arrays.equals(ids, otherArray.ids) && Arrays.equals(values, otherArray.values);
+ }
+
+ @Override
+ public Iterator<Preference> iterator() {
+ return Iterators.transform(new CountingIterator(length()),
+ new Function<Integer, Preference>() {
+ @Override
+ public Preference apply(Integer from) {
+ return new PreferenceView(from);
+ }
+ });
+ }
+
+ @Override
+ public String toString() {
+ if (ids == null || ids.length == 0) {
+ return "GenericItemPreferenceArray[{}]";
+ }
+ StringBuilder result = new StringBuilder(20 * ids.length);
+ result.append("GenericItemPreferenceArray[itemID:");
+ result.append(id);
+ result.append(",{");
+ for (int i = 0; i < ids.length; i++) {
+ if (i > 0) {
+ result.append(',');
+ }
+ result.append(ids[i]);
+ result.append('=');
+ result.append(values[i]);
+ }
+ result.append("}]");
+ return result.toString();
+ }
+
+ private final class PreferenceView implements Preference {
+
+ private final int i;
+
+ private PreferenceView(int i) {
+ this.i = i;
+ }
+
+ @Override
+ public long getUserID() {
+ return GenericItemPreferenceArray.this.getUserID(i);
+ }
+
+ @Override
+ public long getItemID() {
+ return GenericItemPreferenceArray.this.getItemID(i);
+ }
+
+ @Override
+ public float getValue() {
+ return values[i];
+ }
+
+ @Override
+ public void setValue(float value) {
+ values[i] = value;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java
new file mode 100644
index 0000000..e6c7f43
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.io.Serializable;
+
+import org.apache.mahout.cf.taste.model.Preference;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * A simple {@link Preference} encapsulating an item and preference value.
+ * </p>
+ */
+public class GenericPreference implements Preference, Serializable {
+
+ private final long userID;
+ private final long itemID;
+ private float value;
+
+ public GenericPreference(long userID, long itemID, float value) {
+ Preconditions.checkArgument(!Float.isNaN(value), "NaN value");
+ this.userID = userID;
+ this.itemID = itemID;
+ this.value = value;
+ }
+
+ @Override
+ public long getUserID() {
+ return userID;
+ }
+
+ @Override
+ public long getItemID() {
+ return itemID;
+ }
+
+ @Override
+ public float getValue() {
+ return value;
+ }
+
+ @Override
+ public void setValue(float value) {
+ Preconditions.checkArgument(!Float.isNaN(value), "NaN value");
+ this.value = value;
+ }
+
+ @Override
+ public String toString() {
+ return "GenericPreference[userID: " + userID + ", itemID:" + itemID + ", value:" + value + ']';
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java
new file mode 100644
index 0000000..647feeb
--- /dev/null
+++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.iterator.CountingIterator;
+
+/**
+ * <p>
+ * Like {@link GenericItemPreferenceArray} but stores preferences for one user (all user IDs the same) rather
+ * than one item.
+ * </p>
+ *
+ * <p>
+ * This implementation maintains two parallel arrays, of item IDs and values. The idea is to save allocating
+ * {@link Preference} objects themselves. This saves the overhead of {@link Preference} objects but also
+ * duplicating the user ID value.
+ * </p>
+ *
+ * @see BooleanUserPreferenceArray
+ * @see GenericItemPreferenceArray
+ * @see GenericPreference
+ */
+public final class GenericUserPreferenceArray implements PreferenceArray {
+
+ private static final int ITEM = 1;
+ private static final int VALUE = 2;
+ private static final int VALUE_REVERSED = 3;
+
+ private final long[] ids;
+ private long id;
+ private final float[] values;
+
+ public GenericUserPreferenceArray(int size) {
+ this.ids = new long[size];
+ values = new float[size];
+ this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value
+ }
+
+ public GenericUserPreferenceArray(List<? extends Preference> prefs) {
+ this(prefs.size());
+ int size = prefs.size();
+ long userID = Long.MIN_VALUE;
+ for (int i = 0; i < size; i++) {
+ Preference pref = prefs.get(i);
+ if (i == 0) {
+ userID = pref.getUserID();
+ } else {
+ if (userID != pref.getUserID()) {
+ throw new IllegalArgumentException("Not all user IDs are the same");
+ }
+ }
+ ids[i] = pref.getItemID();
+ values[i] = pref.getValue();
+ }
+ id = userID;
+ }
+
+ /**
+ * This is a private copy constructor for clone().
+ */
+ private GenericUserPreferenceArray(long[] ids, long id, float[] values) {
+ this.ids = ids;
+ this.id = id;
+ this.values = values;
+ }
+
+ @Override
+ public int length() {
+ return ids.length;
+ }
+
+ @Override
+ public Preference get(int i) {
+ return new PreferenceView(i);
+ }
+
+ @Override
+ public void set(int i, Preference pref) {
+ id = pref.getUserID();
+ ids[i] = pref.getItemID();
+ values[i] = pref.getValue();
+ }
+
+ @Override
+ public long getUserID(int i) {
+ return id;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Note that this method will actually set the user ID for <em>all</em> preferences.
+ */
+ @Override
+ public void setUserID(int i, long userID) {
+ id = userID;
+ }
+
+ @Override
+ public long getItemID(int i) {
+ return ids[i];
+ }
+
+ @Override
+ public void setItemID(int i, long itemID) {
+ ids[i] = itemID;
+ }
+
+ /**
+ * @return all item IDs
+ */
+ @Override
+ public long[] getIDs() {
+ return ids;
+ }
+
+ @Override
+ public float getValue(int i) {
+ return values[i];
+ }
+
+ @Override
+ public void setValue(int i, float value) {
+ values[i] = value;
+ }
+
+ @Override
+ public void sortByUser() { }
+
+ @Override
+ public void sortByItem() {
+ lateralSort(ITEM);
+ }
+
+ @Override
+ public void sortByValue() {
+ lateralSort(VALUE);
+ }
+
+ @Override
+ public void sortByValueReversed() {
+ lateralSort(VALUE_REVERSED);
+ }
+
+ @Override
+ public boolean hasPrefWithUserID(long userID) {
+ return id == userID;
+ }
+
+ @Override
+ public boolean hasPrefWithItemID(long itemID) {
+ for (long id : ids) {
+ if (itemID == id) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void lateralSort(int type) {
+ //Comb sort: http://en.wikipedia.org/wiki/Comb_sort
+ int length = length();
+ int gap = length;
+ boolean swapped = false;
+ while (gap > 1 || swapped) {
+ if (gap > 1) {
+ gap /= 1.247330950103979; // = 1 / (1 - 1/e^phi)
+ }
+ swapped = false;
+ int max = length - gap;
+ for (int i = 0; i < max; i++) {
+ int other = i + gap;
+ if (isLess(other, i, type)) {
+ swap(i, other);
+ swapped = true;
+ }
+ }
+ }
+ }
+
+ private boolean isLess(int i, int j, int type) {
+ switch (type) {
+ case ITEM:
+ return ids[i] < ids[j];
+ case VALUE:
+ return values[i] < values[j];
+ case VALUE_REVERSED:
+ return values[i] > values[j];
+ default:
+ throw new IllegalStateException();
+ }
+ }
+
+ private void swap(int i, int j) {
+ long temp1 = ids[i];
+ float temp2 = values[i];
+ ids[i] = ids[j];
+ values[i] = values[j];
+ ids[j] = temp1;
+ values[j] = temp2;
+ }
+
+ @Override
+ public GenericUserPreferenceArray clone() {
+ return new GenericUserPreferenceArray(ids.clone(), id, values.clone());
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids) ^ Arrays.hashCode(values);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof GenericUserPreferenceArray)) {
+ return false;
+ }
+ GenericUserPreferenceArray otherArray = (GenericUserPreferenceArray) other;
+ return id == otherArray.id && Arrays.equals(ids, otherArray.ids) && Arrays.equals(values, otherArray.values);
+ }
+
+ @Override
+ public Iterator<Preference> iterator() {
+ return Iterators.transform(new CountingIterator(length()),
+ new Function<Integer, Preference>() {
+ @Override
+ public Preference apply(Integer from) {
+ return new PreferenceView(from);
+ }
+ });
+ }
+
+ @Override
+ public String toString() {
+ if (ids == null || ids.length == 0) {
+ return "GenericUserPreferenceArray[{}]";
+ }
+ StringBuilder result = new StringBuilder(20 * ids.length);
+ result.append("GenericUserPreferenceArray[userID:");
+ result.append(id);
+ result.append(",{");
+ for (int i = 0; i < ids.length; i++) {
+ if (i > 0) {
+ result.append(',');
+ }
+ result.append(ids[i]);
+ result.append('=');
+ result.append(values[i]);
+ }
+ result.append("}]");
+ return result.toString();
+ }
+
+ private final class PreferenceView implements Preference {
+
+ private final int i;
+
+ private PreferenceView(int i) {
+ this.i = i;
+ }
+
+ @Override
+ public long getUserID() {
+ return GenericUserPreferenceArray.this.getUserID(i);
+ }
+
+ @Override
+ public long getItemID() {
+ return GenericUserPreferenceArray.this.getItemID(i);
+ }
+
+ @Override
+ public float getValue() {
+ return values[i];
+ }
+
+ @Override
+ public void setValue(float value) {
+ values[i] = value;
+ }
+
+ }
+
+}