You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/30 00:35:52 UTC

svn commit: r939501 - in /lucene/mahout/trunk/math/src: main/java/org/apache/mahout/math/stats/LogLikelihood.java test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java

Author: srowen
Date: Thu Apr 29 22:35:52 2010
New Revision: 939501

URL: http://svn.apache.org/viewvc?rev=939501&view=rev
Log:
Per Shashikant, make sure LLR is not negative. Streamline implementation and add a few tests.

Modified:
    lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
    lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java

Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java?rev=939501&r1=939500&r2=939501&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java Thu Apr 29 22:35:52 2010
@@ -20,29 +20,28 @@ package org.apache.mahout.math.stats;
 /**
  * Utility methods for working with log-likelihood
  */
-public class LogLikelihood {
+public final class LogLikelihood {
 
   private LogLikelihood() {
   }
 
   /**
    * Calculate the Shannon entropy.
-   * @param elements TODO FILL IN HERE
    * @return The entropy value for the elements
    */
   public static double entropy(int... elements) {
-    double sum = 0;
-    for (int element : elements) {
-      sum += element;
-    }
+    double sum = 0.0;
     double result = 0.0;
-    for (int x : elements) {
-      if (x < 0) {
-        throw new IllegalArgumentException("Should not have negative count for entropy computation: (" + x + ')');
+    for (int element : elements) {
+      if (element < 0) {
+        throw new IllegalArgumentException("Should not have negative count for entropy computation: (" + element + ')');
+      }
+      if (element > 0) {
+        result += element * Math.log(element);
+        sum += element;
       }
-      int zeroFlag = (x == 0 ? 1 : 0);
-      result += x * Math.log((x + zeroFlag) / sum);
     }
+    result -= sum * Math.log(sum);
     return -result;
   }
 
@@ -68,7 +67,11 @@ public class LogLikelihood {
     double rowEntropy = entropy(k11, k12) + entropy(k21, k22);
     double columnEntropy = entropy(k11, k21) + entropy(k12, k22);
     double matrixEntropy = entropy(k11, k12, k21, k22);
-    return 2 * (matrixEntropy - rowEntropy - columnEntropy);
+    if (rowEntropy + columnEntropy > matrixEntropy) {
+      // round off error
+      return 0.0;
+    }
+    return 2.0 * (matrixEntropy - rowEntropy - columnEntropy);
   }
   
   /** 
@@ -88,6 +91,10 @@ public class LogLikelihood {
    */
   public static double rootLogLikelihoodRatio(int k11, int k12, int k21, int k22) {
     double llr = logLikelihoodRatio(k11, k12, k21, k22);
-    return Math.signum(((double) k11 / (k11+k12)) - ((double) k21 / (k21+k22))) * Math.sqrt(llr);
+    double sqrt = Math.sqrt(llr);
+    if (((double) k11 / (k11+k12)) < ((double) k21 / (k21+k22))) {
+      sqrt = -sqrt;
+    }
+    return sqrt;
   }
 }

Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java?rev=939501&r1=939500&r2=939501&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java Thu Apr 29 22:35:52 2010
@@ -25,6 +25,7 @@ public class LogLikelihoodTest extends A
   public void testEntropy() throws Exception {
 
     assertEquals(1.386294, LogLikelihood.entropy(1, 1), 0.0001);
+    assertEquals(0.0, LogLikelihood.entropy(1), 0.0);
     //TODO: more tests here
     try {
       LogLikelihood.entropy(-1, -1);//exception
@@ -37,9 +38,9 @@ public class LogLikelihoodTest extends A
   @Test
   public void testLogLikelihood() throws Exception {
     //TODO: check the epsilons
-    assertEquals(2.772589, LogLikelihood.logLikelihoodRatio(1, 0, 0, 1), 0.0001);
-    assertEquals(27.72589, LogLikelihood.logLikelihoodRatio(10, 0, 0, 10), 0.0001);
-    assertEquals(39.33052, LogLikelihood.logLikelihoodRatio(5, 1995, 0, 100000), 0.0001);
+    assertEquals(2.772589, LogLikelihood.logLikelihoodRatio(1, 0, 0, 1), 0.000001);
+    assertEquals(27.72589, LogLikelihood.logLikelihoodRatio(10, 0, 0, 10), 0.00001);
+    assertEquals(39.33052, LogLikelihood.logLikelihoodRatio(5, 1995, 0, 100000), 0.00001);
     assertEquals(4730.737, LogLikelihood.logLikelihoodRatio(1000, 1995, 1000, 100000), 0.001);
     assertEquals(5734.343, LogLikelihood.logLikelihoodRatio(1000, 1000, 1000, 100000), 0.001);
     assertEquals(5714.932, LogLikelihood.logLikelihoodRatio(1000, 1000, 1000, 99000), 0.001);
@@ -53,4 +54,9 @@ public class LogLikelihoodTest extends A
     // negative because k11 is lower than expected
     assertTrue(LogLikelihood.rootLogLikelihoodRatio(36, 21928, 60280, 623876) < 0.0);
   }
+
+  @Test
+  public void testRootNegativeLLR() {
+    assertEquals(0.0, LogLikelihood.rootLogLikelihoodRatio(6, 7567, 1924, 2426487), 0.00000001);
+  }
 }