You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by is...@apache.org on 2010/01/14 11:58:40 UTC

svn commit: r899157 - in /lucene/mahout/trunk/math/src: main/java/org/apache/mahout/math/stats/LogLikelihood.java test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java

Author: isabel
Date: Thu Jan 14 10:58:40 2010
New Revision: 899157

URL: http://svn.apache.org/viewvc?rev=899157&view=rev
Log:
MAHOUT-244 added root log-likelihood method.

Modified:
    lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
    lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java

Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java?rev=899157&r1=899156&r2=899157&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java Thu Jan 14 10:58:40 2010
@@ -43,7 +43,7 @@
   }
 
   /**
-   * Calculate the Log-likelihood ratio for two events, call them A and B.  Then we have:
+   * Calculate the Raw Log-likelihood ratio for two events, call them A and B.  Then we have:
    * <p/>
    * <table border="1" cellpadding="5" cellspacing="0">
    * <tbody><tr><td>&nbsp;</td><td>Event A</td><td>Everything but A</td></tr>
@@ -55,7 +55,7 @@
    * @param k12 The number of times the second event occurred WITHOUT the first event
    * @param k21 The number of times the first event occurred WITHOUT the second event
    * @param k22 The number of times something else occurred (i.e. was neither of these events
-   * @return The log-likelihood ratio
+   * @return The raw log-likelihood ratio
    *
    * <p/>
    * Credit to http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html for the table and the descriptions.
@@ -66,4 +66,24 @@
     double matrixEntropy = entropy(k11, k12, k21, k22);
     return 2 * (matrixEntropy - rowEntropy - columnEntropy);
   }
+  
+  /** 
+   * Calculate the Root Log-likelihood ratio for two events.
+   * 
+   * @see #logLikelihoodRatio(int, int, int, int);
+
+   * @param k11 The number of times the two events occurred together
+   * @param k12 The number of times the second event occurred WITHOUT the first event
+   * @param k21 The number of times the first event occurred WITHOUT the second event
+   * @param k22 The number of times something else occurred (i.e. was neither of these events
+   * @return The root log-likelihood ratio
+   * 
+   * <p/>
+   * See discussion of raw vs. root LLR at 
+   * http://www.lucidimagination.com/search/document/6dc8709e65a7ced1/llr_scoring_question
+   */
+  public static double rootLogLikelihoodRatio(int k11, int k12, int k21, int k22) {
+    double llr = logLikelihoodRatio(k11, k12, k21, k22);
+    return Math.signum(((double) k11 / (k11+k12)) - ((double) k21 / (k21+k22))) * Math.sqrt(llr);
+  }
 }

Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java?rev=899157&r1=899156&r2=899157&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java Thu Jan 14 10:58:40 2010
@@ -65,4 +65,12 @@
     assertEquals(LogLikelihood.logLikelihoodRatio(1000,1000,1000, 99000), 5714.932, 0.001);
   }
 
+  @Test
+  public void testRootLogLikelihood() throws Exception {
+    // positive where k11 is bigger than expected.
+    assertTrue(LogLikelihood.rootLogLikelihoodRatio(904, 21060, 1144, 283012) > 0.0);
+    
+    // negative because k11 is lower than expected
+    assertTrue(LogLikelihood.rootLogLikelihoodRatio(36, 21928, 60280, 623876) < 0.0);
+  }
 }