You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/06/04 00:59:28 UTC
svn commit: r662912 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/impl/correlation/
test/java/org/apache/mahout/cf/taste/impl/correlation/
Author: srowen
Date: Tue Jun 3 15:59:28 2008
New Revision: 662912
URL: http://svn.apache.org/viewvc?rev=662912&view=rev
Log:
First version of LogLikelihoodCorrelation
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java
- copied, changed from r661892, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java
- copied, changed from r661892, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java
Modified:
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java (from r661892, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java&r1=661892&r2=662912&rev=662912&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java Tue Jun 3 15:59:28 2008
@@ -18,152 +18,45 @@
package org.apache.mahout.cf.taste.impl.correlation;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.correlation.UserCorrelation;
import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
-import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
- * <p>An implementation of a "correlation" based on the
- * <a href="http://en.wikipedia.org/wiki/Jaccard_index">Tanimoto coefficient</a>, or extended
- * Jaccard coefficient.</p>
- *
- * <p>This is intended for "binary" data sets where a user either expersses a generic "yes" preference
- * for an item or has no preference. The actual preference values do not matter here, only their presence
- * or absence.</p>
- *
- * <p>The value returned is in [0,1].</p>
+ * See <a href="http://citeseer.ist.psu.edu/29096.html">http://citeseer.ist.psu.edu/29096.html</a>.
*/
-public final class TanimotoCoefficientCorrelation implements UserCorrelation, ItemCorrelation {
-
- private static final Logger log = LoggerFactory.getLogger(TanimotoCoefficientCorrelation.class);
+public final class LogLikelihoodCorrelation implements ItemCorrelation {
private final DataModel dataModel;
- public TanimotoCoefficientCorrelation(DataModel dataModel) {
+ public LogLikelihoodCorrelation(DataModel dataModel) {
this.dataModel = dataModel;
}
- /**
- * @throws UnsupportedOperationException
- */
- public void setPreferenceInferrer(PreferenceInferrer inferrer) {
- throw new UnsupportedOperationException();
- }
-
- public final double userCorrelation(User user1, User user2) throws TasteException {
-
- if (user1 == null || user2 == null) {
- throw new IllegalArgumentException("user1 or user2 is null");
- }
-
- Preference[] xPrefs = user1.getPreferencesAsArray();
- Preference[] yPrefs = user2.getPreferencesAsArray();
-
- if (xPrefs.length == 0 && yPrefs.length == 0) {
- return Double.NaN;
- }
- if (xPrefs.length == 0 || yPrefs.length == 0) {
- return 0.0;
- }
-
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
- Item xIndex = xPref.getItem();
- Item yIndex = yPref.getItem();
- int xPrefIndex = 1;
- int yPrefIndex = 1;
-
- int intersectionSize = 0;
- while (true) {
- int compare = xIndex.compareTo(yIndex);
- if (compare == 0) {
- intersectionSize++;
- }
- if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
- break;
- }
- xPref = xPrefs[xPrefIndex++];
- xIndex = xPref.getItem();
- }
- if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
- break;
- }
- yPref = yPrefs[yPrefIndex++];
- yIndex = yPref.getItem();
- }
- }
-
- int unionSize = xPrefs.length + yPrefs.length - intersectionSize;
-
- double result = (double) intersectionSize / (double) unionSize;
-
- if (log.isTraceEnabled()) {
- log.trace("User correlation between " + user1 + " and " + user2 + " is " + result);
- }
- return result;
- }
-
public final double itemCorrelation(Item item1, Item item2) throws TasteException {
-
if (item1 == null || item2 == null) {
throw new IllegalArgumentException("item1 or item2 is null");
}
+ int preferring1and2 = dataModel.getNumUsersWithPreferenceFor(item1.getID(), item2.getID());
+ int preferring1 = dataModel.getNumUsersWithPreferenceFor(item1.getID());
+ int preferring2 = dataModel.getNumUsersWithPreferenceFor(item2.getID());
+ int numUsers = dataModel.getNumUsers();
+ double logLikelihood =
+ twoLogLambda(preferring1and2, preferring1 - preferring1and2, preferring2, numUsers - preferring2);
+ return 1.0 - 1.0 / (1.0 + logLikelihood);
+ }
- Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(item1.getID());
- Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(item2.getID());
-
- if (xPrefs.length == 0 && yPrefs.length == 0) {
- return Double.NaN;
- }
- if (xPrefs.length == 0 || yPrefs.length == 0) {
- return 0.0;
- }
-
- Preference xPref = xPrefs[0];
- Preference yPref = yPrefs[0];
- User xIndex = xPref.getUser();
- User yIndex = yPref.getUser();
- int xPrefIndex = 1;
- int yPrefIndex = 1;
-
- int intersectionSize = 0;
- while (true) {
- int compare = xIndex.compareTo(yIndex);
- if (compare == 0) {
- intersectionSize++;
- }
- if (compare <= 0) {
- if (xPrefIndex == xPrefs.length) {
- break;
- }
- xPref = xPrefs[xPrefIndex++];
- xIndex = xPref.getUser();
- }
- if (compare >= 0) {
- if (yPrefIndex == yPrefs.length) {
- break;
- }
- yPref = yPrefs[yPrefIndex++];
- yIndex = yPref.getUser();
- }
- }
-
- int unionSize = xPrefs.length + yPrefs.length - intersectionSize;
+ private static double twoLogLambda(double k1, double k2, double n1, double n2) {
+ double p = (k1 + k2) / (n1 + n2);
+ return 2.0 * (logL(k1 / n1, k1, n1) + logL(k2 / n2, k2, n2) - logL(p, k1, n1) - logL(p, k2, n2));
+ }
- double result = (double) intersectionSize / (double) unionSize;
+ private static double logL(double p, double k, double n) {
+ return k * safeLog(p) + (n - k) * safeLog(1.0 - p);
+ }
- if (log.isTraceEnabled()) {
- log.trace("Item correlation between " + item1 + " and " + item2 + " is " + result);
- }
- return result;
+ private static double safeLog(double d) {
+ return d <= 0.0 ? 0 : Math.log(d);
}
public void refresh() {
@@ -172,7 +65,7 @@
@Override
public final String toString() {
- return "TanimotoCoefficientCorrelation[dataModel:" + dataModel + ']';
+ return "LogLikelihoodCorrelation[dataModel:" + dataModel + ']';
}
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java?rev=662912&r1=662911&r2=662912&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java Tue Jun 3 15:59:28 2008
@@ -22,6 +22,7 @@
abstract class CorrelationTestCase extends TasteTestCase {
static void assertCorrelationEquals(double expected, double actual) {
+ assertTrue("Correlation is NaN", !Double.isNaN(actual));
assertTrue("Correlation > 1.0", actual <= 1.0);
assertTrue("Correlation < -1.0", actual >= -1.0);
assertEquals(expected, actual, EPSILON);
Copied: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java (from r661892, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java?p2=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java&p1=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java&r1=661892&r2=662912&rev=662912&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java Tue Jun 3 15:59:28 2008
@@ -21,69 +21,46 @@
import org.apache.mahout.cf.taste.model.User;
/**
- * <p>Tests {@link TanimotoCoefficientCorrelation}.</p>
+ * <p>Tests {@link LogLikelihoodCorrelation}.</p>
*/
-public final class TanimotoCoefficientCorrelationTest extends CorrelationTestCase {
+public final class LogLikelihoodCorrelationTest extends CorrelationTestCase {
- public void testNoCorrelation1() throws Exception {
- User user1 = getUser("test1");
- User user2 = getUser("test2");
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
- assertTrue(Double.isNaN(correlation));
- }
-
- public void testNoCorrelation2() throws Exception {
- User user1 = getUser("test1");
- User user2 = getUser("test2", 1.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(0.0, correlation);
- }
-
- public void testNoCorrelation() throws Exception {
- User user1 = getUser("test1", null, 2.0, 3.0);
- User user2 = getUser("test2", 1.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(0.0, correlation);
- }
-
- public void testFullCorrelation1() throws Exception {
- User user1 = getUser("test1", 1.0);
- User user2 = getUser("test2", 1.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(1.0, correlation);
- }
-
- public void testFullCorrelation2() throws Exception {
- User user1 = getUser("test1", 1.0, 2.0, 3.0);
- User user2 = getUser("test2", 1.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
- assertCorrelationEquals(0.3333333333333333, correlation);
- }
-
- public void testCorrelation1() throws Exception {
- User user1 = getUser("test1", null, 2.0, 3.0);
- User user2 = getUser("test2", 1.0, 1.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
- assertEquals(0.3333333333333333, correlation);
- }
-
- public void testCorrelation2() throws Exception {
- User user1 = getUser("test1", null, 2.0, 3.0, 1.0);
- User user2 = getUser("test2", 1.0, 1.0, null, 0.0);
- DataModel dataModel = getDataModel(user1, user2);
- double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
- assertEquals(0.5, correlation);
+ public void testCorrelation() throws Exception {
+ User user1 = getUser("test1", 1.0, 1.0);
+ User user2 = getUser("test2", 1.0, null, 1.0);
+ User user3 = getUser("test3", null, null, 1.0, 1.0, 1.0);
+ User user4 = getUser("test4", 1.0, 1.0, 1.0, 1.0, 1.0);
+ User user5 = getUser("test5", null, 1.0, 1.0, 1.0, 1.0);
+ DataModel dataModel = getDataModel(user1, user2, user3, user4, user5);
+
+ double correlation = new LogLikelihoodCorrelation(dataModel).
+ itemCorrelation(dataModel.getItem("1"), dataModel.getItem("0"));
+ assertCorrelationEquals(0.12160727029227925, correlation);
+
+ correlation = new LogLikelihoodCorrelation(dataModel).
+ itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+ assertCorrelationEquals(0.12160727029227925, correlation);
+
+ correlation = new LogLikelihoodCorrelation(dataModel).
+ itemCorrelation(dataModel.getItem("2"), dataModel.getItem("1"));
+ assertCorrelationEquals(0.5423213660693733, correlation);
+
+ correlation = new LogLikelihoodCorrelation(dataModel).
+ itemCorrelation(dataModel.getItem("2"), dataModel.getItem("3"));
+ assertCorrelationEquals(0.6905400104897509, correlation);
+
+ correlation = new LogLikelihoodCorrelation(dataModel).
+ itemCorrelation(dataModel.getItem("3"), dataModel.getItem("4"));
+ assertCorrelationEquals(0.8706358464330881, correlation);
+
+ correlation = new LogLikelihoodCorrelation(dataModel).
+ itemCorrelation(dataModel.getItem("4"), dataModel.getItem("3"));
+ assertCorrelationEquals(0.8706358464330881, correlation);
}
public void testRefresh() {
// Make sure this doesn't throw an exception
- new TanimotoCoefficientCorrelation(getDataModel()).refresh();
+ new LogLikelihoodCorrelation(getDataModel()).refresh();
}
}
\ No newline at end of file