You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/06/04 00:59:28 UTC

svn commit: r662912 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/impl/correlation/ test/java/org/apache/mahout/cf/taste/impl/correlation/

Author: srowen
Date: Tue Jun  3 15:59:28 2008
New Revision: 662912

URL: http://svn.apache.org/viewvc?rev=662912&view=rev
Log:
First version of LogLikelihoodCorrelation

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java
      - copied, changed from r661892, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java
      - copied, changed from r661892, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java
Modified:
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java (from r661892, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java&r1=661892&r2=662912&rev=662912&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelation.java Tue Jun  3 15:59:28 2008
@@ -18,152 +18,45 @@
 package org.apache.mahout.cf.taste.impl.correlation;
 
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.correlation.UserCorrelation;
 import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
-import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
- * <p>An implementation of a "correlation" based on the
- * <a href="http://en.wikipedia.org/wiki/Jaccard_index">Tanimoto coefficient</a>, or extended
- * Jaccard coefficient.</p>
- *
- * <p>This is intended for "binary" data sets where a user either expersses a generic "yes" preference
- * for an item or has no preference. The actual preference values do not matter here, only their presence
- * or absence.</p>
- *
- * <p>The value returned is in [0,1].</p>
+ * See <a href="http://citeseer.ist.psu.edu/29096.html">http://citeseer.ist.psu.edu/29096.html</a>.
  */
-public final class TanimotoCoefficientCorrelation implements UserCorrelation, ItemCorrelation {
-
-  private static final Logger log = LoggerFactory.getLogger(TanimotoCoefficientCorrelation.class);
+public final class LogLikelihoodCorrelation implements ItemCorrelation {
 
   private final DataModel dataModel;
 
-  public TanimotoCoefficientCorrelation(DataModel dataModel) {
+  public LogLikelihoodCorrelation(DataModel dataModel) {
     this.dataModel = dataModel;
   }
 
-  /**
-   * @throws UnsupportedOperationException
-   */
-  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
-    throw new UnsupportedOperationException();
-  }
-
-  public final double userCorrelation(User user1, User user2) throws TasteException {
-
-    if (user1 == null || user2 == null) {
-      throw new IllegalArgumentException("user1 or user2 is null");
-    }
-
-    Preference[] xPrefs = user1.getPreferencesAsArray();
-    Preference[] yPrefs = user2.getPreferencesAsArray();
-
-    if (xPrefs.length == 0 && yPrefs.length == 0) {
-      return Double.NaN;
-    }
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
-      return 0.0;
-    }
-
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
-    Item xIndex = xPref.getItem();
-    Item yIndex = yPref.getItem();
-    int xPrefIndex = 1;
-    int yPrefIndex = 1;
-
-    int intersectionSize = 0;
-    while (true) {
-      int compare = xIndex.compareTo(yIndex);
-      if (compare == 0) {
-        intersectionSize++;
-      }
-      if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
-          break;
-        }
-        xPref = xPrefs[xPrefIndex++];
-        xIndex = xPref.getItem();
-      }
-      if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
-          break;
-        }
-        yPref = yPrefs[yPrefIndex++];
-        yIndex = yPref.getItem();
-      }
-    }
-
-    int unionSize = xPrefs.length + yPrefs.length - intersectionSize;
-
-    double result = (double) intersectionSize / (double) unionSize;
-
-    if (log.isTraceEnabled()) {
-      log.trace("User correlation between " + user1 + " and " + user2 + " is " + result);
-    }
-    return result;
-  }
-
   public final double itemCorrelation(Item item1, Item item2) throws TasteException {
-
     if (item1 == null || item2 == null) {
       throw new IllegalArgumentException("item1 or item2 is null");
     }
+    int preferring1and2 = dataModel.getNumUsersWithPreferenceFor(item1.getID(), item2.getID());
+    int preferring1 = dataModel.getNumUsersWithPreferenceFor(item1.getID());
+    int preferring2 = dataModel.getNumUsersWithPreferenceFor(item2.getID());
+    int numUsers = dataModel.getNumUsers();
+    double logLikelihood =
+      twoLogLambda(preferring1and2, preferring1 - preferring1and2, preferring2, numUsers - preferring2);
+    return 1.0 - 1.0 / (1.0 + logLikelihood);
+  }
 
-    Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(item1.getID());
-    Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(item2.getID());
-
-    if (xPrefs.length == 0 && yPrefs.length == 0) {
-      return Double.NaN;
-    }
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
-      return 0.0;
-    }
-
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
-    User xIndex = xPref.getUser();
-    User yIndex = yPref.getUser();
-    int xPrefIndex = 1;
-    int yPrefIndex = 1;
-
-    int intersectionSize = 0;
-    while (true) {
-      int compare = xIndex.compareTo(yIndex);
-      if (compare == 0) {
-        intersectionSize++;
-      }
-      if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
-          break;
-        }
-        xPref = xPrefs[xPrefIndex++];
-        xIndex = xPref.getUser();
-      }
-      if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
-          break;
-        }
-        yPref = yPrefs[yPrefIndex++];
-        yIndex = yPref.getUser();
-      }
-    }
-
-    int unionSize = xPrefs.length + yPrefs.length - intersectionSize;
+  private static double twoLogLambda(double k1, double k2, double n1, double n2) {
+    double p = (k1 + k2) / (n1 + n2);
+    return 2.0 * (logL(k1 / n1, k1, n1) + logL(k2 / n2, k2, n2) - logL(p, k1, n1) - logL(p, k2, n2));
+  }
 
-    double result = (double) intersectionSize / (double) unionSize;
+  private static double logL(double p, double k, double n) {
+    return k * safeLog(p) + (n - k) * safeLog(1.0 - p);
+  }
 
-    if (log.isTraceEnabled()) {
-      log.trace("Item correlation between " + item1 + " and " + item2 + " is " + result);
-    }
-    return result;
+  private static double safeLog(double d) {
+    return d <= 0.0 ? 0 : Math.log(d);
   }
 
   public void refresh() {
@@ -172,7 +65,7 @@
 
   @Override
   public final String toString() {
-    return "TanimotoCoefficientCorrelation[dataModel:" + dataModel + ']';
+    return "LogLikelihoodCorrelation[dataModel:" + dataModel + ']';
   }
 
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java?rev=662912&r1=662911&r2=662912&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/CorrelationTestCase.java Tue Jun  3 15:59:28 2008
@@ -22,6 +22,7 @@
 abstract class CorrelationTestCase extends TasteTestCase {
 
   static void assertCorrelationEquals(double expected, double actual) {
+    assertTrue("Correlation is NaN", !Double.isNaN(actual));
     assertTrue("Correlation > 1.0", actual <= 1.0);
     assertTrue("Correlation < -1.0", actual >= -1.0);
     assertEquals(expected, actual, EPSILON);

Copied: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java (from r661892, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java?p2=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java&p1=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java&r1=661892&r2=662912&rev=662912&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/LogLikelihoodCorrelationTest.java Tue Jun  3 15:59:28 2008
@@ -21,69 +21,46 @@
 import org.apache.mahout.cf.taste.model.User;
 
 /**
- * <p>Tests {@link TanimotoCoefficientCorrelation}.</p>
+ * <p>Tests {@link LogLikelihoodCorrelation}.</p>
  */
-public final class TanimotoCoefficientCorrelationTest extends CorrelationTestCase {
+public final class LogLikelihoodCorrelationTest extends CorrelationTestCase {
 
-  public void testNoCorrelation1() throws Exception {
-    User user1 = getUser("test1");
-    User user2 = getUser("test2");
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
-    assertTrue(Double.isNaN(correlation));
-  }
-
-  public void testNoCorrelation2() throws Exception {
-    User user1 = getUser("test1");
-    User user2 = getUser("test2", 1.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(0.0, correlation);
-  }
-
-  public void testNoCorrelation() throws Exception {
-    User user1 = getUser("test1", null, 2.0, 3.0);
-    User user2 = getUser("test2", 1.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(0.0, correlation);
-  }
-
-  public void testFullCorrelation1() throws Exception {
-    User user1 = getUser("test1", 1.0);
-    User user2 = getUser("test2", 1.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(1.0, correlation);
-  }
-
-  public void testFullCorrelation2() throws Exception {
-    User user1 = getUser("test1", 1.0, 2.0, 3.0);
-    User user2 = getUser("test2", 1.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(0.3333333333333333, correlation);
-  }
-
-  public void testCorrelation1() throws Exception {
-    User user1 = getUser("test1", null, 2.0, 3.0);
-    User user2 = getUser("test2", 1.0, 1.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
-    assertEquals(0.3333333333333333, correlation);
-  }
-
-  public void testCorrelation2() throws Exception {
-    User user1 = getUser("test1", null, 2.0, 3.0, 1.0);
-    User user2 = getUser("test2", 1.0, 1.0, null, 0.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
-    assertEquals(0.5, correlation);
+  public void testCorrelation() throws Exception {
+    User user1 = getUser("test1", 1.0,  1.0);
+    User user2 = getUser("test2", 1.0,  null, 1.0);
+    User user3 = getUser("test3", null, null, 1.0,  1.0,  1.0);
+    User user4 = getUser("test4", 1.0,  1.0,  1.0,  1.0,  1.0);
+    User user5 = getUser("test5", null, 1.0,  1.0,  1.0,  1.0);
+    DataModel dataModel = getDataModel(user1, user2, user3, user4, user5);
+
+    double correlation = new LogLikelihoodCorrelation(dataModel).
+        itemCorrelation(dataModel.getItem("1"), dataModel.getItem("0"));
+    assertCorrelationEquals(0.12160727029227925, correlation);
+
+    correlation = new LogLikelihoodCorrelation(dataModel).
+        itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+    assertCorrelationEquals(0.12160727029227925, correlation);
+
+    correlation = new LogLikelihoodCorrelation(dataModel).
+        itemCorrelation(dataModel.getItem("2"), dataModel.getItem("1"));
+    assertCorrelationEquals(0.5423213660693733, correlation);
+
+    correlation = new LogLikelihoodCorrelation(dataModel).
+        itemCorrelation(dataModel.getItem("2"), dataModel.getItem("3"));
+    assertCorrelationEquals(0.6905400104897509, correlation);
+
+    correlation = new LogLikelihoodCorrelation(dataModel).
+        itemCorrelation(dataModel.getItem("3"), dataModel.getItem("4"));
+    assertCorrelationEquals(0.8706358464330881, correlation);
+
+    correlation = new LogLikelihoodCorrelation(dataModel).
+        itemCorrelation(dataModel.getItem("4"), dataModel.getItem("3"));
+    assertCorrelationEquals(0.8706358464330881, correlation);
   }
 
   public void testRefresh() {
     // Make sure this doesn't throw an exception
-    new TanimotoCoefficientCorrelation(getDataModel()).refresh();
+    new LogLikelihoodCorrelation(getDataModel()).refresh();
   }
 
 }
\ No newline at end of file