You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/31 00:06:09 UTC
svn commit: r661892 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/impl/correlation/
test/java/org/apache/mahout/cf/taste/impl/
test/java/org/apache/mahout/cf/taste/impl/correlation/
Author: srowen
Date: Fri May 30 15:06:08 2008
New Revision: 661892
URL: http://svn.apache.org/viewvc?rev=661892&view=rev
Log:
Added EuclideanDistanceCorrelation, TanimotoCoefficientCorrelation, plus tests, having been inspired by the book "Collective Intelligence"
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java
- copied, changed from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java
- copied, changed from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java
- copied, changed from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java
- copied, changed from r657697, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java
- copied, changed from r657697, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java (from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java Fri May 30 15:06:08 2008
@@ -17,46 +17,27 @@
package org.apache.mahout.cf.taste.impl.correlation;
-import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
import org.apache.mahout.cf.taste.correlation.UserCorrelation;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
import org.apache.mahout.cf.taste.transforms.PreferenceTransform2;
+import org.apache.mahout.cf.taste.common.TasteException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
- * are calculated:</p>
- *
- * <ul>
- * <li>sumX2: sum of the square of all X's preference values</li>
- * <li>sumY2: sum of the square of all Y's preference values</li>
- * <li>sumXY: sum of the product of X and Y's preference value for all items for which both
- * X and Y express a preference</li>
- * </ul>
- *
- * <p>The correlation is then:
- *
- * <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
- *
- * <p>where <code>size</code> is the number of {@link Item}s in the {@link DataModel}.</p>
- *
- * <p>Note that this correlation "centers" its data, shifts the user's preference values so that
- * each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
- *
- * <p>This correlation implementation is equivalent to the cosine measure correlation since the data it
- * receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
- * angle between the two vectors defined by the users' preference values.</p>
+ * Abstract superclass encapsulating functionality that is common to most
+ * implementations in this package, including the basic correlation algorithm,
+ * normalization, transforms, etc.
*/
-public final class PearsonCorrelation implements UserCorrelation, ItemCorrelation {
+abstract class AbstractCorrelation implements UserCorrelation, ItemCorrelation {
- private static final Logger log = LoggerFactory.getLogger(PearsonCorrelation.class);
+ private static final Logger log = LoggerFactory.getLogger(AbstractCorrelation.class);
private final DataModel dataModel;
private PreferenceInferrer inferrer;
@@ -65,21 +46,21 @@
private boolean weighted;
/**
- * <p>Creates a normal (unweighted) {@link PearsonCorrelation}.</p>
+ * <p>Creates a normal (unweighted) {@link org.apache.mahout.cf.taste.impl.correlation.PearsonCorrelation}.</p>
*
* @param dataModel
*/
- public PearsonCorrelation(DataModel dataModel) {
+ public AbstractCorrelation(DataModel dataModel) {
this(dataModel, false);
}
/**
- * <p>Creates a weighted {@link PearsonCorrelation}.</p>
+ * <p>Creates a weighted {@link org.apache.mahout.cf.taste.impl.correlation.PearsonCorrelation}.</p>
*
* @param dataModel
* @param weighted
*/
- public PearsonCorrelation(DataModel dataModel, boolean weighted) {
+ public AbstractCorrelation(DataModel dataModel, boolean weighted) {
if (dataModel == null) {
throw new IllegalArgumentException("dataModel is null");
}
@@ -87,74 +68,60 @@
this.weighted = weighted;
}
- /**
- * <p>Several subclasses in this package implement this method to actually compute the correlation
- * from figures computed over users or items. Note that the computations in this class "center" the
- * data, such that X and Y's mean are 0.</p>
- *
- * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
- * the standard correlation computations as a result.</p>
- *
- * @param n total number of users or items
- * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
- * both users/items
- * @param sumX2 sum of the square of user/item preference values, over the first item/user
- * @param sumY2 sum of the square of the user/item preference values, over the second item/user
- * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
- * can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
- */
- private static double computeResult(int n, double sumXY, double sumX2, double sumY2) {
- if (n == 0) {
- return Double.NaN;
- }
- // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
- // the data is assumed to be centered.
- double xTerm = Math.sqrt(sumX2);
- double yTerm = Math.sqrt(sumY2);
- double denominator = xTerm * yTerm;
- if (denominator == 0.0) {
- // One or both parties has -all- the same ratings;
- // can't really say much correlation under this measure
- return Double.NaN;
- }
- return sumXY / denominator;
- }
-
- DataModel getDataModel() {
+ final DataModel getDataModel() {
return dataModel;
}
- PreferenceInferrer getPreferenceInferrer() {
+ final PreferenceInferrer getPreferenceInferrer() {
return inferrer;
}
- public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+ public final void setPreferenceInferrer(PreferenceInferrer inferrer) {
if (inferrer == null) {
throw new IllegalArgumentException("inferrer is null");
}
this.inferrer = inferrer;
}
- public PreferenceTransform2 getPrefTransform() {
+ public final PreferenceTransform2 getPrefTransform() {
return prefTransform;
}
- public void setPrefTransform(PreferenceTransform2 prefTransform) {
+ public final void setPrefTransform(PreferenceTransform2 prefTransform) {
this.prefTransform = prefTransform;
}
- public CorrelationTransform<?> getCorrelationTransform() {
+ public final CorrelationTransform<Object> getCorrelationTransform() {
return correlationTransform;
}
- public void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
+ public final void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
this.correlationTransform = correlationTransform;
}
- boolean isWeighted() {
+ final boolean isWeighted() {
return weighted;
}
+ /**
+ * <p>Several subclasses in this package implement this method to actually compute the correlation
+ * from figures computed over users or items. Note that the computations in this class "center" the
+ * data, such that X and Y's mean are 0.</p>
+ *
+ * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
+ * the standard correlation computations as a result.</p>
+ *
+ * @param n total number of users or items
+ * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
+ * both users/items
+ * @param sumX2 sum of the square of user/item preference values, over the first item/user
+ * @param sumY2 sum of the square of the user/item preference values, over the second item/user
+ * @param sumXYdiff2 sum of squares of differences in X and Y values
+ * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
+ * can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
+ */
+ abstract double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2);
+
public double userCorrelation(User user1, User user2) throws TasteException {
if (user1 == null || user2 == null) {
@@ -180,6 +147,7 @@
double sumY = 0.0;
double sumY2 = 0.0;
double sumXY = 0.0;
+ double sumXYdiff2 = 0.0;
int count = 0;
boolean hasInferrer = inferrer != null;
@@ -226,6 +194,8 @@
sumX2 += x * x;
sumY += y;
sumY2 += y * y;
+ double diff = x - y;
+ sumXYdiff2 += diff * diff;
count++;
}
if (compare <= 0) {
@@ -252,7 +222,7 @@
double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
+ double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
if (correlationTransform != null) {
result = correlationTransform.transformCorrelation(user1, user2, result);
@@ -268,7 +238,7 @@
return result;
}
- public double itemCorrelation(Item item1, Item item2) throws TasteException {
+ public final double itemCorrelation(Item item1, Item item2) throws TasteException {
if (item1 == null || item2 == null) {
throw new IllegalArgumentException("item1 or item2 is null");
@@ -293,6 +263,7 @@
double sumY = 0.0;
double sumY2 = 0.0;
double sumXY = 0.0;
+ double sumXYdiff2 = 0.0;
int count = 0;
// No, pref inferrers and transforms don't appy here. I think.
@@ -308,6 +279,8 @@
sumX2 += x * x;
sumY += y;
sumY2 += y * y;
+ double diff = x - y;
+ sumXYdiff2 += diff * diff;
count++;
}
if (compare <= 0) {
@@ -334,7 +307,7 @@
double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
+ double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
if (correlationTransform != null) {
result = correlationTransform.transformCorrelation(item1, item2, result);
@@ -345,12 +318,12 @@
}
if (log.isTraceEnabled()) {
- log.trace("UserCorrelation between " + item1 + " and " + item2 + " is " + result);
+ log.trace("ItemCorrelation between " + item1 + " and " + item2 + " is " + result);
}
return result;
}
- private double normalizeWeightResult(double result, int count, int num) {
+ final double normalizeWeightResult(double result, int count, int num) {
if (weighted) {
double scaleFactor = 1.0 - (double) count / (double) (num + 1);
if (result < 0.0) {
@@ -368,7 +341,7 @@
return result;
}
- public void refresh() {
+ public final void refresh() {
dataModel.refresh();
if (inferrer != null) {
inferrer.refresh();
@@ -382,8 +355,8 @@
}
@Override
- public String toString() {
- return "PearsonCorrelation[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
+ public final String toString() {
+ return this.getClass().getSimpleName() + "[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
}
-}
+}
\ No newline at end of file
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java (from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java Fri May 30 15:06:08 2008
@@ -17,373 +17,39 @@
package org.apache.mahout.cf.taste.impl.correlation;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
-import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
-import org.apache.mahout.cf.taste.correlation.UserCorrelation;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
-import org.apache.mahout.cf.taste.transforms.PreferenceTransform2;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
- * are calculated:</p>
- *
- * <ul>
- * <li>sumX2: sum of the square of all X's preference values</li>
- * <li>sumY2: sum of the square of all Y's preference values</li>
- * <li>sumXY: sum of the product of X and Y's preference value for all items for which both
- * X and Y express a preference</li>
- * </ul>
- *
- * <p>The correlation is then:
- *
- * <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
- *
- * <p>where <code>size</code> is the number of {@link Item}s in the {@link DataModel}.</p>
- *
- * <p>Note that this correlation "centers" its data, shifts the user's preference values so that
- * each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
- *
- * <p>This correlation implementation is equivalent to the cosine measure correlation since the data it
- * receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
- * angle between the two vectors defined by the users' preference values.</p>
+ * <p>An implementation of a "correlation" based on the Euclidean "distance" between two
+ * {@link org.apache.mahout.cf.taste.model.User}s X and Y. Thinking of items as dimensions
+ * and preferences as points along those dimensions, a distance is computed using all
+ * items (dimensions) where both users have expressed a preference for that item. This
+ * is simply the square root of the sum of the squares of differences in position (preference)
+ * along each dimension. The correlation is then computed as 1 / (1 + distance), so the
+ * resulting values are in the range (0,1].</p>
*/
-public final class PearsonCorrelation implements UserCorrelation, ItemCorrelation {
+public final class EuclideanDistanceCorrelation extends AbstractCorrelation {
- private static final Logger log = LoggerFactory.getLogger(PearsonCorrelation.class);
-
- private final DataModel dataModel;
- private PreferenceInferrer inferrer;
- private PreferenceTransform2 prefTransform;
- private CorrelationTransform<Object> correlationTransform;
- private boolean weighted;
-
- /**
- * <p>Creates a normal (unweighted) {@link PearsonCorrelation}.</p>
- *
- * @param dataModel
- */
- public PearsonCorrelation(DataModel dataModel) {
- this(dataModel, false);
+ public EuclideanDistanceCorrelation(DataModel dataModel) {
+ super(dataModel);
}
- /**
- * <p>Creates a weighted {@link PearsonCorrelation}.</p>
- *
- * @param dataModel
- * @param weighted
- */
- public PearsonCorrelation(DataModel dataModel, boolean weighted) {
- if (dataModel == null) {
- throw new IllegalArgumentException("dataModel is null");
- }
- this.dataModel = dataModel;
- this.weighted = weighted;
+ public EuclideanDistanceCorrelation(DataModel dataModel, boolean weighted) {
+ super(dataModel, weighted);
}
- /**
- * <p>Several subclasses in this package implement this method to actually compute the correlation
- * from figures computed over users or items. Note that the computations in this class "center" the
- * data, such that X and Y's mean are 0.</p>
- *
- * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
- * the standard correlation computations as a result.</p>
- *
- * @param n total number of users or items
- * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
- * both users/items
- * @param sumX2 sum of the square of user/item preference values, over the first item/user
- * @param sumY2 sum of the square of the user/item preference values, over the second item/user
- * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
- * can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
- */
- private static double computeResult(int n, double sumXY, double sumX2, double sumY2) {
+ double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
if (n == 0) {
return Double.NaN;
}
- // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
- // the data is assumed to be centered.
- double xTerm = Math.sqrt(sumX2);
- double yTerm = Math.sqrt(sumY2);
- double denominator = xTerm * yTerm;
+ double denominator = Math.sqrt(sumX2) + Math.sqrt(sumY2);
if (denominator == 0.0) {
- // One or both parties has -all- the same ratings;
- // can't really say much correlation under this measure
return Double.NaN;
}
- return sumXY / denominator;
- }
-
- DataModel getDataModel() {
- return dataModel;
- }
-
- PreferenceInferrer getPreferenceInferrer() {
- return inferrer;
- }
-
- public void setPreferenceInferrer(PreferenceInferrer inferrer) {
- if (inferrer == null) {
- throw new IllegalArgumentException("inferrer is null");
- }
- this.inferrer = inferrer;
- }
-
- public PreferenceTransform2 getPrefTransform() {
- return prefTransform;
- }
-
- public void setPrefTransform(PreferenceTransform2 prefTransform) {
- this.prefTransform = prefTransform;
- }
-
- public CorrelationTransform<?> getCorrelationTransform() {
- return correlationTransform;
- }
-
- public void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
- this.correlationTransform = correlationTransform;
- }
-
- boolean isWeighted() {
- return weighted;
- }
-
- public double userCorrelation(User user1, User user2) throws TasteException {
-
- if (user1 == null || user2 == null) {
- throw new IllegalArgumentException("user1 or user2 is null");
- }
-
- Preference[] xPrefs = user1.getPreferencesAsArray();
- Preference[] yPrefs = user2.getPreferencesAsArray();
-
- if (xPrefs.length == 0 || yPrefs.length == 0) {
- return Double.NaN;
- }
-
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
- Item xIndex = xPref.getItem();
- Item yIndex = yPref.getItem();
- int xPrefIndex = 1;
- int yPrefIndex = 1;
-
- double sumX = 0.0;
- double sumX2 = 0.0;
- double sumY = 0.0;
- double sumY2 = 0.0;
- double sumXY = 0.0;
- int count = 0;
-
- boolean hasInferrer = inferrer != null;
- boolean hasPrefTransform = prefTransform != null;
-
- while (true) {
- int compare = xIndex.compareTo(yIndex);
- if (hasInferrer || compare == 0) {
- double x;
- double y;
- if (compare == 0) {
- // Both users expressed a preference for the item
- if (hasPrefTransform) {
- x = prefTransform.getTransformedValue(xPref);
- y = prefTransform.getTransformedValue(yPref);
- } else {
- x = xPref.getValue();
- y = yPref.getValue();
- }
- } else {
- // Only one user expressed a preference, but infer the other one's preference and tally
- // as if the other user expressed that preference
- if (compare < 0) {
- // X has a value; infer Y's
- if (hasPrefTransform) {
- x = prefTransform.getTransformedValue(xPref);
- } else {
- x = xPref.getValue();
- }
- y = inferrer.inferPreference(user2, xIndex);
- } else {
- // compare > 0
- // Y has a value; infer X's
- x = inferrer.inferPreference(user1, yIndex);
- if (hasPrefTransform) {
- y = prefTransform.getTransformedValue(yPref);
- } else {
- y = yPref.getValue();
- }
- }
- }
- sumXY += x * y;
- sumX += x;
- sumX2 += x * x;
- sumY += y;
- sumY2 += y * y;
- count++;
- }
- if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
- break;
- }
- xPref = xPrefs[xPrefIndex++];
- xIndex = xPref.getItem();
- }
- if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
- break;
- }
- yPref = yPrefs[yPrefIndex++];
- yIndex = yPref.getItem();
- }
- }
-
- // "Center" the data. If my math is correct, this'll do it.
- double n = (double) count;
- double meanX = sumX / n;
- double meanY = sumY / n;
- double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
- double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
- double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
-
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
- if (correlationTransform != null) {
- result = correlationTransform.transformCorrelation(user1, user2, result);
- }
-
- if (!Double.isNaN(result)) {
- result = normalizeWeightResult(result, count, dataModel.getNumItems());
- }
-
- if (log.isTraceEnabled()) {
- log.trace("UserCorrelation between " + user1 + " and " + user2 + " is " + result);
- }
- return result;
- }
-
- public double itemCorrelation(Item item1, Item item2) throws TasteException {
-
- if (item1 == null || item2 == null) {
- throw new IllegalArgumentException("item1 or item2 is null");
- }
-
- Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(item1.getID());
- Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(item2.getID());
-
- if (xPrefs.length == 0 || yPrefs.length == 0) {
- return Double.NaN;
- }
-
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
- User xIndex = xPref.getUser();
- User yIndex = yPref.getUser();
- int xPrefIndex = 1;
- int yPrefIndex = 1;
-
- double sumX = 0.0;
- double sumX2 = 0.0;
- double sumY = 0.0;
- double sumY2 = 0.0;
- double sumXY = 0.0;
- int count = 0;
-
- // No, pref inferrers and transforms don't appy here. I think.
-
- while (true) {
- int compare = xIndex.compareTo(yIndex);
- if (compare == 0) {
- // Both users expressed a preference for the item
- double x = xPref.getValue();
- double y = yPref.getValue();
- sumXY += x * y;
- sumX += x;
- sumX2 += x * x;
- sumY += y;
- sumY2 += y * y;
- count++;
- }
- if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
- break;
- }
- xPref = xPrefs[xPrefIndex++];
- xIndex = xPref.getUser();
- }
- if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
- break;
- }
- yPref = yPrefs[yPrefIndex++];
- yIndex = yPref.getUser();
- }
- }
-
- // See comments above on these computations
- double n = (double) count;
- double meanX = sumX / n;
- double meanY = sumY / n;
- double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
- double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
- double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
-
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
- if (correlationTransform != null) {
- result = correlationTransform.transformCorrelation(item1, item2, result);
- }
-
- if (!Double.isNaN(result)) {
- result = normalizeWeightResult(result, count, dataModel.getNumUsers());
- }
-
- if (log.isTraceEnabled()) {
- log.trace("UserCorrelation between " + item1 + " and " + item2 + " is " + result);
- }
- return result;
- }
-
- private double normalizeWeightResult(double result, int count, int num) {
- if (weighted) {
- double scaleFactor = 1.0 - (double) count / (double) (num + 1);
- if (result < 0.0) {
- result = -1.0 + scaleFactor * (1.0 + result);
- } else {
- result = 1.0 - scaleFactor * (1.0 - result);
- }
- }
- // Make sure the result is not accidentally a little outside [-1.0, 1.0] due to rounding:
- if (result < -1.0) {
- result = -1.0;
- } else if (result > 1.0) {
- result = 1.0;
- }
- return result;
- }
-
- public void refresh() {
- dataModel.refresh();
- if (inferrer != null) {
- inferrer.refresh();
- }
- if (prefTransform != null) {
- prefTransform.refresh();
- }
- if (correlationTransform != null) {
- correlationTransform.refresh();
- }
- }
-
- @Override
- public String toString() {
- return "PearsonCorrelation[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
+ // normalize a bit for magnitude
+ sumXYdiff2 /= denominator;
+ // divide by n below to not automatically give users with more overlap more correlation
+ return 1.0 / (1.0 + (Math.sqrt(sumXYdiff2) / (double) n));
}
-}
+}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java?rev=661892&r1=661891&r2=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java Fri May 30 15:06:08 2008
@@ -17,18 +17,9 @@
package org.apache.mahout.cf.taste.impl.correlation;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
-import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
-import org.apache.mahout.cf.taste.correlation.UserCorrelation;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
-import org.apache.mahout.cf.taste.transforms.PreferenceTransform2;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
* <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
@@ -54,56 +45,17 @@
* receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
* angle between the two vectors defined by the users' preference values.</p>
*/
-public final class PearsonCorrelation implements UserCorrelation, ItemCorrelation {
+public final class PearsonCorrelation extends AbstractCorrelation {
- private static final Logger log = LoggerFactory.getLogger(PearsonCorrelation.class);
-
- private final DataModel dataModel;
- private PreferenceInferrer inferrer;
- private PreferenceTransform2 prefTransform;
- private CorrelationTransform<Object> correlationTransform;
- private boolean weighted;
-
- /**
- * <p>Creates a normal (unweighted) {@link PearsonCorrelation}.</p>
- *
- * @param dataModel
- */
public PearsonCorrelation(DataModel dataModel) {
- this(dataModel, false);
+ super(dataModel);
}
- /**
- * <p>Creates a weighted {@link PearsonCorrelation}.</p>
- *
- * @param dataModel
- * @param weighted
- */
public PearsonCorrelation(DataModel dataModel, boolean weighted) {
- if (dataModel == null) {
- throw new IllegalArgumentException("dataModel is null");
- }
- this.dataModel = dataModel;
- this.weighted = weighted;
+ super(dataModel, weighted);
}
- /**
- * <p>Several subclasses in this package implement this method to actually compute the correlation
- * from figures computed over users or items. Note that the computations in this class "center" the
- * data, such that X and Y's mean are 0.</p>
- *
- * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
- * the standard correlation computations as a result.</p>
- *
- * @param n total number of users or items
- * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
- * both users/items
- * @param sumX2 sum of the square of user/item preference values, over the first item/user
- * @param sumY2 sum of the square of the user/item preference values, over the second item/user
- * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
- * can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
- */
- private static double computeResult(int n, double sumXY, double sumX2, double sumY2) {
+ double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
if (n == 0) {
return Double.NaN;
}
@@ -120,270 +72,4 @@
return sumXY / denominator;
}
- DataModel getDataModel() {
- return dataModel;
- }
-
- PreferenceInferrer getPreferenceInferrer() {
- return inferrer;
- }
-
- public void setPreferenceInferrer(PreferenceInferrer inferrer) {
- if (inferrer == null) {
- throw new IllegalArgumentException("inferrer is null");
- }
- this.inferrer = inferrer;
- }
-
- public PreferenceTransform2 getPrefTransform() {
- return prefTransform;
- }
-
- public void setPrefTransform(PreferenceTransform2 prefTransform) {
- this.prefTransform = prefTransform;
- }
-
- public CorrelationTransform<?> getCorrelationTransform() {
- return correlationTransform;
- }
-
- public void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
- this.correlationTransform = correlationTransform;
- }
-
- boolean isWeighted() {
- return weighted;
- }
-
- public double userCorrelation(User user1, User user2) throws TasteException {
-
- if (user1 == null || user2 == null) {
- throw new IllegalArgumentException("user1 or user2 is null");
- }
-
- Preference[] xPrefs = user1.getPreferencesAsArray();
- Preference[] yPrefs = user2.getPreferencesAsArray();
-
- if (xPrefs.length == 0 || yPrefs.length == 0) {
- return Double.NaN;
- }
-
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
- Item xIndex = xPref.getItem();
- Item yIndex = yPref.getItem();
- int xPrefIndex = 1;
- int yPrefIndex = 1;
-
- double sumX = 0.0;
- double sumX2 = 0.0;
- double sumY = 0.0;
- double sumY2 = 0.0;
- double sumXY = 0.0;
- int count = 0;
-
- boolean hasInferrer = inferrer != null;
- boolean hasPrefTransform = prefTransform != null;
-
- while (true) {
- int compare = xIndex.compareTo(yIndex);
- if (hasInferrer || compare == 0) {
- double x;
- double y;
- if (compare == 0) {
- // Both users expressed a preference for the item
- if (hasPrefTransform) {
- x = prefTransform.getTransformedValue(xPref);
- y = prefTransform.getTransformedValue(yPref);
- } else {
- x = xPref.getValue();
- y = yPref.getValue();
- }
- } else {
- // Only one user expressed a preference, but infer the other one's preference and tally
- // as if the other user expressed that preference
- if (compare < 0) {
- // X has a value; infer Y's
- if (hasPrefTransform) {
- x = prefTransform.getTransformedValue(xPref);
- } else {
- x = xPref.getValue();
- }
- y = inferrer.inferPreference(user2, xIndex);
- } else {
- // compare > 0
- // Y has a value; infer X's
- x = inferrer.inferPreference(user1, yIndex);
- if (hasPrefTransform) {
- y = prefTransform.getTransformedValue(yPref);
- } else {
- y = yPref.getValue();
- }
- }
- }
- sumXY += x * y;
- sumX += x;
- sumX2 += x * x;
- sumY += y;
- sumY2 += y * y;
- count++;
- }
- if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
- break;
- }
- xPref = xPrefs[xPrefIndex++];
- xIndex = xPref.getItem();
- }
- if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
- break;
- }
- yPref = yPrefs[yPrefIndex++];
- yIndex = yPref.getItem();
- }
- }
-
- // "Center" the data. If my math is correct, this'll do it.
- double n = (double) count;
- double meanX = sumX / n;
- double meanY = sumY / n;
- double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
- double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
- double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
-
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
- if (correlationTransform != null) {
- result = correlationTransform.transformCorrelation(user1, user2, result);
- }
-
- if (!Double.isNaN(result)) {
- result = normalizeWeightResult(result, count, dataModel.getNumItems());
- }
-
- if (log.isTraceEnabled()) {
- log.trace("UserCorrelation between " + user1 + " and " + user2 + " is " + result);
- }
- return result;
- }
-
- public double itemCorrelation(Item item1, Item item2) throws TasteException {
-
- if (item1 == null || item2 == null) {
- throw new IllegalArgumentException("item1 or item2 is null");
- }
-
- Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(item1.getID());
- Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(item2.getID());
-
- if (xPrefs.length == 0 || yPrefs.length == 0) {
- return Double.NaN;
- }
-
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
- User xIndex = xPref.getUser();
- User yIndex = yPref.getUser();
- int xPrefIndex = 1;
- int yPrefIndex = 1;
-
- double sumX = 0.0;
- double sumX2 = 0.0;
- double sumY = 0.0;
- double sumY2 = 0.0;
- double sumXY = 0.0;
- int count = 0;
-
- // No, pref inferrers and transforms don't appy here. I think.
-
- while (true) {
- int compare = xIndex.compareTo(yIndex);
- if (compare == 0) {
- // Both users expressed a preference for the item
- double x = xPref.getValue();
- double y = yPref.getValue();
- sumXY += x * y;
- sumX += x;
- sumX2 += x * x;
- sumY += y;
- sumY2 += y * y;
- count++;
- }
- if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
- break;
- }
- xPref = xPrefs[xPrefIndex++];
- xIndex = xPref.getUser();
- }
- if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
- break;
- }
- yPref = yPrefs[yPrefIndex++];
- yIndex = yPref.getUser();
- }
- }
-
- // See comments above on these computations
- double n = (double) count;
- double meanX = sumX / n;
- double meanY = sumY / n;
- double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
- double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
- double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
-
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
- if (correlationTransform != null) {
- result = correlationTransform.transformCorrelation(item1, item2, result);
- }
-
- if (!Double.isNaN(result)) {
- result = normalizeWeightResult(result, count, dataModel.getNumUsers());
- }
-
- if (log.isTraceEnabled()) {
- log.trace("UserCorrelation between " + item1 + " and " + item2 + " is " + result);
- }
- return result;
- }
-
- private double normalizeWeightResult(double result, int count, int num) {
- if (weighted) {
- double scaleFactor = 1.0 - (double) count / (double) (num + 1);
- if (result < 0.0) {
- result = -1.0 + scaleFactor * (1.0 + result);
- } else {
- result = 1.0 - scaleFactor * (1.0 - result);
- }
- }
- // Make sure the result is not accidentally a little outside [-1.0, 1.0] due to rounding:
- if (result < -1.0) {
- result = -1.0;
- } else if (result > 1.0) {
- result = 1.0;
- }
- return result;
- }
-
- public void refresh() {
- dataModel.refresh();
- if (inferrer != null) {
- inferrer.refresh();
- }
- if (prefTransform != null) {
- prefTransform.refresh();
- }
- if (correlationTransform != null) {
- correlationTransform.refresh();
- }
- }
-
- @Override
- public String toString() {
- return "PearsonCorrelation[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
- }
-
}
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java (from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java Fri May 30 15:06:08 2008
@@ -17,145 +17,46 @@
package org.apache.mahout.cf.taste.impl.correlation;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
-import org.apache.mahout.cf.taste.correlation.UserCorrelation;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
-import org.apache.mahout.cf.taste.transforms.PreferenceTransform2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
- * are calculated:</p>
- *
- * <ul>
- * <li>sumX2: sum of the square of all X's preference values</li>
- * <li>sumY2: sum of the square of all Y's preference values</li>
- * <li>sumXY: sum of the product of X and Y's preference value for all items for which both
- * X and Y express a preference</li>
- * </ul>
- *
- * <p>The correlation is then:
- *
- * <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
- *
- * <p>where <code>size</code> is the number of {@link Item}s in the {@link DataModel}.</p>
+ * <p>An implementation of a "correlation" based on the
+ * <a href="http://en.wikipedia.org/wiki/Jaccard_index">Tanimoto coefficient</a>, or extended
+ * Jaccard coefficient.</p>
+ *
+ * <p>This is intended for "binary" data sets where a user either expersses a generic "yes" preference
+ * for an item or has no preference. The actual preference values do not matter here, only their presence
+ * or absence.</p>
*
- * <p>Note that this correlation "centers" its data, shifts the user's preference values so that
- * each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
- *
- * <p>This correlation implementation is equivalent to the cosine measure correlation since the data it
- * receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
- * angle between the two vectors defined by the users' preference values.</p>
+ * <p>The value returned is in [0,1].</p>
*/
-public final class PearsonCorrelation implements UserCorrelation, ItemCorrelation {
+public final class TanimotoCoefficientCorrelation implements UserCorrelation, ItemCorrelation {
- private static final Logger log = LoggerFactory.getLogger(PearsonCorrelation.class);
+ private static final Logger log = LoggerFactory.getLogger(TanimotoCoefficientCorrelation.class);
private final DataModel dataModel;
- private PreferenceInferrer inferrer;
- private PreferenceTransform2 prefTransform;
- private CorrelationTransform<Object> correlationTransform;
- private boolean weighted;
-
- /**
- * <p>Creates a normal (unweighted) {@link PearsonCorrelation}.</p>
- *
- * @param dataModel
- */
- public PearsonCorrelation(DataModel dataModel) {
- this(dataModel, false);
- }
- /**
- * <p>Creates a weighted {@link PearsonCorrelation}.</p>
- *
- * @param dataModel
- * @param weighted
- */
- public PearsonCorrelation(DataModel dataModel, boolean weighted) {
- if (dataModel == null) {
- throw new IllegalArgumentException("dataModel is null");
- }
+ public TanimotoCoefficientCorrelation(DataModel dataModel) {
this.dataModel = dataModel;
- this.weighted = weighted;
}
/**
- * <p>Several subclasses in this package implement this method to actually compute the correlation
- * from figures computed over users or items. Note that the computations in this class "center" the
- * data, such that X and Y's mean are 0.</p>
- *
- * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
- * the standard correlation computations as a result.</p>
- *
- * @param n total number of users or items
- * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
- * both users/items
- * @param sumX2 sum of the square of user/item preference values, over the first item/user
- * @param sumY2 sum of the square of the user/item preference values, over the second item/user
- * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
- * can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
+ * @throws UnsupportedOperationException
*/
- private static double computeResult(int n, double sumXY, double sumX2, double sumY2) {
- if (n == 0) {
- return Double.NaN;
- }
- // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
- // the data is assumed to be centered.
- double xTerm = Math.sqrt(sumX2);
- double yTerm = Math.sqrt(sumY2);
- double denominator = xTerm * yTerm;
- if (denominator == 0.0) {
- // One or both parties has -all- the same ratings;
- // can't really say much correlation under this measure
- return Double.NaN;
- }
- return sumXY / denominator;
- }
-
- DataModel getDataModel() {
- return dataModel;
- }
-
- PreferenceInferrer getPreferenceInferrer() {
- return inferrer;
- }
-
public void setPreferenceInferrer(PreferenceInferrer inferrer) {
- if (inferrer == null) {
- throw new IllegalArgumentException("inferrer is null");
- }
- this.inferrer = inferrer;
- }
-
- public PreferenceTransform2 getPrefTransform() {
- return prefTransform;
- }
-
- public void setPrefTransform(PreferenceTransform2 prefTransform) {
- this.prefTransform = prefTransform;
+ throw new UnsupportedOperationException();
}
- public CorrelationTransform<?> getCorrelationTransform() {
- return correlationTransform;
- }
-
- public void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
- this.correlationTransform = correlationTransform;
- }
-
- boolean isWeighted() {
- return weighted;
- }
-
- public double userCorrelation(User user1, User user2) throws TasteException {
+ public final double userCorrelation(User user1, User user2) throws TasteException {
if (user1 == null || user2 == null) {
throw new IllegalArgumentException("user1 or user2 is null");
@@ -164,9 +65,12 @@
Preference[] xPrefs = user1.getPreferencesAsArray();
Preference[] yPrefs = user2.getPreferencesAsArray();
- if (xPrefs.length == 0 || yPrefs.length == 0) {
+ if (xPrefs.length == 0 && yPrefs.length == 0) {
return Double.NaN;
}
+ if (xPrefs.length == 0 || yPrefs.length == 0) {
+ return 0.0;
+ }
Preference xPref = xPrefs[0];
Preference yPref = yPrefs[0];
@@ -175,58 +79,11 @@
int xPrefIndex = 1;
int yPrefIndex = 1;
- double sumX = 0.0;
- double sumX2 = 0.0;
- double sumY = 0.0;
- double sumY2 = 0.0;
- double sumXY = 0.0;
- int count = 0;
-
- boolean hasInferrer = inferrer != null;
- boolean hasPrefTransform = prefTransform != null;
-
+ int intersectionSize = 0;
while (true) {
int compare = xIndex.compareTo(yIndex);
- if (hasInferrer || compare == 0) {
- double x;
- double y;
- if (compare == 0) {
- // Both users expressed a preference for the item
- if (hasPrefTransform) {
- x = prefTransform.getTransformedValue(xPref);
- y = prefTransform.getTransformedValue(yPref);
- } else {
- x = xPref.getValue();
- y = yPref.getValue();
- }
- } else {
- // Only one user expressed a preference, but infer the other one's preference and tally
- // as if the other user expressed that preference
- if (compare < 0) {
- // X has a value; infer Y's
- if (hasPrefTransform) {
- x = prefTransform.getTransformedValue(xPref);
- } else {
- x = xPref.getValue();
- }
- y = inferrer.inferPreference(user2, xIndex);
- } else {
- // compare > 0
- // Y has a value; infer X's
- x = inferrer.inferPreference(user1, yIndex);
- if (hasPrefTransform) {
- y = prefTransform.getTransformedValue(yPref);
- } else {
- y = yPref.getValue();
- }
- }
- }
- sumXY += x * y;
- sumX += x;
- sumX2 += x * x;
- sumY += y;
- sumY2 += y * y;
- count++;
+ if (compare == 0) {
+ intersectionSize++;
}
if (compare <= 0) {
if (xPrefIndex == xPrefs.length) {
@@ -244,31 +101,17 @@
}
}
- // "Center" the data. If my math is correct, this'll do it.
- double n = (double) count;
- double meanX = sumX / n;
- double meanY = sumY / n;
- double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
- double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
- double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
+ int unionSize = xPrefs.length + yPrefs.length - intersectionSize;
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
- if (correlationTransform != null) {
- result = correlationTransform.transformCorrelation(user1, user2, result);
- }
-
- if (!Double.isNaN(result)) {
- result = normalizeWeightResult(result, count, dataModel.getNumItems());
- }
+ double result = (double) intersectionSize / (double) unionSize;
if (log.isTraceEnabled()) {
- log.trace("UserCorrelation between " + user1 + " and " + user2 + " is " + result);
+ log.trace("User correlation between " + user1 + " and " + user2 + " is " + result);
}
return result;
}
- public double itemCorrelation(Item item1, Item item2) throws TasteException {
+ public final double itemCorrelation(Item item1, Item item2) throws TasteException {
if (item1 == null || item2 == null) {
throw new IllegalArgumentException("item1 or item2 is null");
@@ -277,9 +120,12 @@
Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(item1.getID());
Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(item2.getID());
- if (xPrefs.length == 0 || yPrefs.length == 0) {
+ if (xPrefs.length == 0 && yPrefs.length == 0) {
return Double.NaN;
}
+ if (xPrefs.length == 0 || yPrefs.length == 0) {
+ return 0.0;
+ }
Preference xPref = xPrefs[0];
Preference yPref = yPrefs[0];
@@ -288,27 +134,11 @@
int xPrefIndex = 1;
int yPrefIndex = 1;
- double sumX = 0.0;
- double sumX2 = 0.0;
- double sumY = 0.0;
- double sumY2 = 0.0;
- double sumXY = 0.0;
- int count = 0;
-
- // No, pref inferrers and transforms don't appy here. I think.
-
+ int intersectionSize = 0;
while (true) {
int compare = xIndex.compareTo(yIndex);
if (compare == 0) {
- // Both users expressed a preference for the item
- double x = xPref.getValue();
- double y = yPref.getValue();
- sumXY += x * y;
- sumX += x;
- sumX2 += x * x;
- sumY += y;
- sumY2 += y * y;
- count++;
+ intersectionSize++;
}
if (compare <= 0) {
if (xPrefIndex == xPrefs.length) {
@@ -326,64 +156,23 @@
}
}
- // See comments above on these computations
- double n = (double) count;
- double meanX = sumX / n;
- double meanY = sumY / n;
- double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
- double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
- double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
+ int unionSize = xPrefs.length + yPrefs.length - intersectionSize;
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
- if (correlationTransform != null) {
- result = correlationTransform.transformCorrelation(item1, item2, result);
- }
-
- if (!Double.isNaN(result)) {
- result = normalizeWeightResult(result, count, dataModel.getNumUsers());
- }
+ double result = (double) intersectionSize / (double) unionSize;
if (log.isTraceEnabled()) {
- log.trace("UserCorrelation between " + item1 + " and " + item2 + " is " + result);
- }
- return result;
- }
-
- private double normalizeWeightResult(double result, int count, int num) {
- if (weighted) {
- double scaleFactor = 1.0 - (double) count / (double) (num + 1);
- if (result < 0.0) {
- result = -1.0 + scaleFactor * (1.0 + result);
- } else {
- result = 1.0 - scaleFactor * (1.0 - result);
- }
- }
- // Make sure the result is not accidentally a little outside [-1.0, 1.0] due to rounding:
- if (result < -1.0) {
- result = -1.0;
- } else if (result > 1.0) {
- result = 1.0;
+ log.trace("Item correlation between " + item1 + " and " + item2 + " is " + result);
}
return result;
}
public void refresh() {
dataModel.refresh();
- if (inferrer != null) {
- inferrer.refresh();
- }
- if (prefTransform != null) {
- prefTransform.refresh();
- }
- if (correlationTransform != null) {
- correlationTransform.refresh();
- }
}
@Override
- public String toString() {
- return "PearsonCorrelation[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
+ public final String toString() {
+ return "TanimotoCoefficientCorrelation[dataModel:" + dataModel + ']';
}
-}
+}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java?rev=661892&r1=661891&r2=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java Fri May 30 15:06:08 2008
@@ -44,11 +44,13 @@
RandomUtils.useTestSeed();
}
- public static User getUser(String userID, double... values) {
+ public static User getUser(String userID, Double... values) {
List<Preference> prefs = new ArrayList<Preference>(values.length);
int i = 0;
- for (double value : values) {
- prefs.add(new GenericPreference(null, new GenericItem<String>(String.valueOf(i)), value));
+ for (Double value : values) {
+ if (value != null) {
+ prefs.add(new GenericPreference(null, new GenericItem<String>(String.valueOf(i)), value));
+ }
i++;
}
return new GenericUser<String>(userID, prefs);
Copied: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java (from r657697, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java?p2=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java&p1=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java Fri May 30 15:06:08 2008
@@ -28,15 +28,15 @@
import java.util.Collections;
/**
- * <p>Tests {@link PearsonCorrelation}.</p>
+ * <p>Tests {@link EuclideanDistanceCorrelation}.</p>
*/
-public final class PearsonCorrelationTest extends CorrelationTestCase {
+public final class EuclideanDistanceCorrelationTest extends CorrelationTestCase {
public void testFullCorrelation1() throws Exception {
User user1 = getUser("test1", 3.0, -2.0);
User user2 = getUser("test2", 3.0, -2.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
+ double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
assertCorrelationEquals(1.0, correlation);
}
@@ -44,7 +44,7 @@
User user1 = getUser("test1", 3.0, -2.0);
User user2 = getUser("test2", 3.0, -2.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
+ double correlation = new EuclideanDistanceCorrelation(dataModel, true).userCorrelation(user1, user2);
assertCorrelationEquals(1.0, correlation);
}
@@ -52,8 +52,7 @@
User user1 = getUser("test1", 3.0, 3.0);
User user2 = getUser("test2", 3.0, 3.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- // Yeah, undefined in this case
+ double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
assertTrue(Double.isNaN(correlation));
}
@@ -61,16 +60,16 @@
User user1 = getUser("test1", 3.0, -2.0);
User user2 = getUser("test2", -3.0, 2.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(-1.0, correlation);
+ double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
+ assertCorrelationEquals(0.424465381883345, correlation);
}
public void testNoCorrelation1Weighted() throws Exception {
User user1 = getUser("test1", 3.0, -2.0);
User user2 = getUser("test2", -3.0, 2.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
- assertCorrelationEquals(-1.0, correlation);
+ double correlation = new EuclideanDistanceCorrelation(dataModel, true).userCorrelation(user1, user2);
+ assertCorrelationEquals(0.8081551272944483, correlation);
}
public void testNoCorrelation2() throws Exception {
@@ -79,7 +78,7 @@
Preference pref2 = new GenericPreference(null, new GenericItem<String>("2"), 1.0);
GenericUser<String> user2 = new GenericUser<String>("test2", Collections.singletonList(pref2));
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
+ double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
assertTrue(Double.isNaN(correlation));
}
@@ -87,24 +86,24 @@
User user1 = getUser("test1", 90.0, 80.0, 70.0);
User user2 = getUser("test2", 70.0, 80.0, 90.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(-1.0, correlation);
+ double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
+ assertCorrelationEquals(0.3606507916004517, correlation);
}
public void testSimple() throws Exception {
User user1 = getUser("test1", 1.0, 2.0, 3.0);
User user2 = getUser("test2", 2.0, 5.0, 6.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(0.9607689228305227, correlation);
+ double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
+ assertCorrelationEquals(0.5896248568217328, correlation);
}
public void testSimpleWeighted() throws Exception {
User user1 = getUser("test1", 1.0, 2.0, 3.0);
User user2 = getUser("test2", 2.0, 5.0, 6.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
- assertCorrelationEquals(0.9901922307076306, correlation);
+ double correlation = new EuclideanDistanceCorrelation(dataModel, true).userCorrelation(user1, user2);
+ assertCorrelationEquals(0.8974062142054332, correlation);
}
public void testFullItemCorrelation1() throws Exception {
@@ -112,7 +111,7 @@
User user2 = getUser("test2", -2.0, -2.0);
DataModel dataModel = getDataModel(user1, user2);
double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+ new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
assertCorrelationEquals(1.0, correlation);
}
@@ -121,7 +120,7 @@
User user2 = getUser("test2", 3.0, 3.0);
DataModel dataModel = getDataModel(user1, user2);
double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+ new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
// Yeah, undefined in this case
assertTrue(Double.isNaN(correlation));
}
@@ -131,8 +130,8 @@
User user2 = getUser("test2", -2.0, 2.0);
DataModel dataModel = getDataModel(user1, user2);
double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(-1.0, correlation);
+ new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+ assertCorrelationEquals(0.424465381883345, correlation);
}
public void testNoItemCorrelation2() throws Exception {
@@ -142,7 +141,7 @@
GenericUser<String> user2 = new GenericUser<String>("test2", Collections.singletonList(pref2));
DataModel dataModel = getDataModel(user1, user2);
double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("1"), dataModel.getItem("2"));
+ new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("1"), dataModel.getItem("2"));
assertTrue(Double.isNaN(correlation));
}
@@ -152,8 +151,8 @@
User user3 = getUser("test3", 70.0, 90.0);
DataModel dataModel = getDataModel(user1, user2, user3);
double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(-1.0, correlation);
+ new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+ assertCorrelationEquals(0.3606507916004517, correlation);
}
public void testSimpleItem() throws Exception {
@@ -162,8 +161,8 @@
User user3 = getUser("test3", 3.0, 6.0);
DataModel dataModel = getDataModel(user1, user2, user3);
double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(0.9607689228305227, correlation);
+ new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+ assertCorrelationEquals(0.5896248568217328, correlation);
}
public void testSimpleItemWeighted() throws Exception {
@@ -171,14 +170,14 @@
User user2 = getUser("test2", 2.0, 5.0);
User user3 = getUser("test3", 3.0, 6.0);
DataModel dataModel = getDataModel(user1, user2, user3);
- ItemCorrelation itemCorrelation = new PearsonCorrelation(dataModel, true);
+ ItemCorrelation itemCorrelation = new EuclideanDistanceCorrelation(dataModel, true);
double correlation = itemCorrelation.itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(0.9901922307076306, correlation);
+ assertCorrelationEquals(0.8974062142054332, correlation);
}
public void testRefresh() {
// Make sure this doesn't throw an exception
- new PearsonCorrelation(getDataModel()).refresh();
+ new EuclideanDistanceCorrelation(getDataModel()).refresh();
}
-}
+}
\ No newline at end of file
Copied: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java (from r657697, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java?p2=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java&p1=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java Fri May 30 15:06:08 2008
@@ -17,168 +17,73 @@
package org.apache.mahout.cf.taste.impl.correlation;
-import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
-import org.apache.mahout.cf.taste.impl.model.GenericItem;
-import org.apache.mahout.cf.taste.impl.model.GenericPreference;
-import org.apache.mahout.cf.taste.impl.model.GenericUser;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.User;
-import java.util.Collections;
-
/**
- * <p>Tests {@link PearsonCorrelation}.</p>
+ * <p>Tests {@link TanimotoCoefficientCorrelation}.</p>
*/
-public final class PearsonCorrelationTest extends CorrelationTestCase {
-
- public void testFullCorrelation1() throws Exception {
- User user1 = getUser("test1", 3.0, -2.0);
- User user2 = getUser("test2", 3.0, -2.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(1.0, correlation);
- }
-
- public void testFullCorrelation1Weighted() throws Exception {
- User user1 = getUser("test1", 3.0, -2.0);
- User user2 = getUser("test2", 3.0, -2.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
- assertCorrelationEquals(1.0, correlation);
- }
-
- public void testFullCorrelation2() throws Exception {
- User user1 = getUser("test1", 3.0, 3.0);
- User user2 = getUser("test2", 3.0, 3.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- // Yeah, undefined in this case
- assertTrue(Double.isNaN(correlation));
- }
+public final class TanimotoCoefficientCorrelationTest extends CorrelationTestCase {
public void testNoCorrelation1() throws Exception {
- User user1 = getUser("test1", 3.0, -2.0);
- User user2 = getUser("test2", -3.0, 2.0);
+ User user1 = getUser("test1");
+ User user2 = getUser("test2");
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(-1.0, correlation);
- }
-
- public void testNoCorrelation1Weighted() throws Exception {
- User user1 = getUser("test1", 3.0, -2.0);
- User user2 = getUser("test2", -3.0, 2.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
- assertCorrelationEquals(-1.0, correlation);
+ double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+ assertTrue(Double.isNaN(correlation));
}
public void testNoCorrelation2() throws Exception {
- Preference pref1 = new GenericPreference(null, new GenericItem<String>("1"), 1.0);
- GenericUser<String> user1 = new GenericUser<String>("test1", Collections.singletonList(pref1));
- Preference pref2 = new GenericPreference(null, new GenericItem<String>("2"), 1.0);
- GenericUser<String> user2 = new GenericUser<String>("test2", Collections.singletonList(pref2));
+ User user1 = getUser("test1");
+ User user2 = getUser("test2", 1.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- assertTrue(Double.isNaN(correlation));
+ double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+ assertCorrelationEquals(0.0, correlation);
}
- public void testNoCorrelation3() throws Exception {
- User user1 = getUser("test1", 90.0, 80.0, 70.0);
- User user2 = getUser("test2", 70.0, 80.0, 90.0);
+ public void testNoCorrelation() throws Exception {
+ User user1 = getUser("test1", null, 2.0, 3.0);
+ User user2 = getUser("test2", 1.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(-1.0, correlation);
+ double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+ assertCorrelationEquals(0.0, correlation);
}
- public void testSimple() throws Exception {
- User user1 = getUser("test1", 1.0, 2.0, 3.0);
- User user2 = getUser("test2", 2.0, 5.0, 6.0);
+ public void testFullCorrelation1() throws Exception {
+ User user1 = getUser("test1", 1.0);
+ User user2 = getUser("test2", 1.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(0.9607689228305227, correlation);
+ double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+ assertCorrelationEquals(1.0, correlation);
}
- public void testSimpleWeighted() throws Exception {
+ public void testFullCorrelation2() throws Exception {
User user1 = getUser("test1", 1.0, 2.0, 3.0);
- User user2 = getUser("test2", 2.0, 5.0, 6.0);
+ User user2 = getUser("test2", 1.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
- assertCorrelationEquals(0.9901922307076306, correlation);
+ double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+ assertCorrelationEquals(0.3333333333333333, correlation);
}
- public void testFullItemCorrelation1() throws Exception {
- User user1 = getUser("test1", 3.0, 3.0);
- User user2 = getUser("test2", -2.0, -2.0);
+ public void testCorrelation1() throws Exception {
+ User user1 = getUser("test1", null, 2.0, 3.0);
+ User user2 = getUser("test2", 1.0, 1.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(1.0, correlation);
- }
-
- public void testFullItemCorrelation2() throws Exception {
- User user1 = getUser("test1", 3.0, 3.0);
- User user2 = getUser("test2", 3.0, 3.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- // Yeah, undefined in this case
- assertTrue(Double.isNaN(correlation));
+ double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+ assertEquals(0.3333333333333333, correlation);
}
- public void testNoItemCorrelation1() throws Exception {
- User user1 = getUser("test1", 3.0, -3.0);
- User user2 = getUser("test2", -2.0, 2.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(-1.0, correlation);
- }
-
- public void testNoItemCorrelation2() throws Exception {
- Preference pref1 = new GenericPreference(null, new GenericItem<String>("1"), 1.0);
- GenericUser<String> user1 = new GenericUser<String>("test1", Collections.singletonList(pref1));
- Preference pref2 = new GenericPreference(null, new GenericItem<String>("2"), 1.0);
- GenericUser<String> user2 = new GenericUser<String>("test2", Collections.singletonList(pref2));
+ public void testCorrelation2() throws Exception {
+ User user1 = getUser("test1", null, 2.0, 3.0, 1.0);
+ User user2 = getUser("test2", 1.0, 1.0, null, 0.0);
DataModel dataModel = getDataModel(user1, user2);
- double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("1"), dataModel.getItem("2"));
- assertTrue(Double.isNaN(correlation));
- }
-
- public void testNoItemCorrelation3() throws Exception {
- User user1 = getUser("test1", 90.0, 70.0);
- User user2 = getUser("test2", 80.0, 80.0);
- User user3 = getUser("test3", 70.0, 90.0);
- DataModel dataModel = getDataModel(user1, user2, user3);
- double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(-1.0, correlation);
- }
-
- public void testSimpleItem() throws Exception {
- User user1 = getUser("test1", 1.0, 2.0);
- User user2 = getUser("test2", 2.0, 5.0);
- User user3 = getUser("test3", 3.0, 6.0);
- DataModel dataModel = getDataModel(user1, user2, user3);
- double correlation =
- new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(0.9607689228305227, correlation);
- }
-
- public void testSimpleItemWeighted() throws Exception {
- User user1 = getUser("test1", 1.0, 2.0);
- User user2 = getUser("test2", 2.0, 5.0);
- User user3 = getUser("test3", 3.0, 6.0);
- DataModel dataModel = getDataModel(user1, user2, user3);
- ItemCorrelation itemCorrelation = new PearsonCorrelation(dataModel, true);
- double correlation = itemCorrelation.itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
- assertCorrelationEquals(0.9901922307076306, correlation);
+ double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+ assertEquals(0.5, correlation);
}
public void testRefresh() {
// Make sure this doesn't throw an exception
- new PearsonCorrelation(getDataModel()).refresh();
+ new TanimotoCoefficientCorrelation(getDataModel()).refresh();
}
-}
+}
\ No newline at end of file