You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by is...@apache.org on 2010/01/14 11:58:40 UTC
svn commit: r899157 - in /lucene/mahout/trunk/math/src:
main/java/org/apache/mahout/math/stats/LogLikelihood.java
test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
Author: isabel
Date: Thu Jan 14 10:58:40 2010
New Revision: 899157
URL: http://svn.apache.org/viewvc?rev=899157&view=rev
Log:
MAHOUT-244 added root log-likelihood method.
Modified:
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java?rev=899157&r1=899156&r2=899157&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java Thu Jan 14 10:58:40 2010
@@ -43,7 +43,7 @@
}
/**
- * Calculate the Log-likelihood ratio for two events, call them A and B. Then we have:
+ * Calculate the Raw Log-likelihood ratio for two events, call them A and B. Then we have:
* <p/>
* <table border="1" cellpadding="5" cellspacing="0">
* <tbody><tr><td> </td><td>Event A</td><td>Everything but A</td></tr>
@@ -55,7 +55,7 @@
* @param k12 The number of times the second event occurred WITHOUT the first event
* @param k21 The number of times the first event occurred WITHOUT the second event
* @param k22 The number of times something else occurred (i.e. was neither of these events
- * @return The log-likelihood ratio
+ * @return The raw log-likelihood ratio
*
* <p/>
* Credit to http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html for the table and the descriptions.
@@ -66,4 +66,24 @@
double matrixEntropy = entropy(k11, k12, k21, k22);
return 2 * (matrixEntropy - rowEntropy - columnEntropy);
}
+
+ /**
+ * Calculate the Root Log-likelihood ratio for two events.
+ *
+ * @see #logLikelihoodRatio(int, int, int, int);
+
+ * @param k11 The number of times the two events occurred together
+ * @param k12 The number of times the second event occurred WITHOUT the first event
+ * @param k21 The number of times the first event occurred WITHOUT the second event
+ * @param k22 The number of times something else occurred (i.e. was neither of these events
+ * @return The root log-likelihood ratio
+ *
+ * <p/>
+ * See discussion of raw vs. root LLR at
+ * http://www.lucidimagination.com/search/document/6dc8709e65a7ced1/llr_scoring_question
+ */
+ public static double rootLogLikelihoodRatio(int k11, int k12, int k21, int k22) {
+ double llr = logLikelihoodRatio(k11, k12, k21, k22);
+ return Math.signum(((double) k11 / (k11+k12)) - ((double) k21 / (k21+k22))) * Math.sqrt(llr);
+ }
}
Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java?rev=899157&r1=899156&r2=899157&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java Thu Jan 14 10:58:40 2010
@@ -65,4 +65,12 @@
assertEquals(LogLikelihood.logLikelihoodRatio(1000,1000,1000, 99000), 5714.932, 0.001);
}
+ @Test
+ public void testRootLogLikelihood() throws Exception {
+ // positive where k11 is bigger than expected.
+ assertTrue(LogLikelihood.rootLogLikelihoodRatio(904, 21060, 1144, 283012) > 0.0);
+
+ // negative because k11 is lower than expected
+ assertTrue(LogLikelihood.rootLogLikelihoodRatio(36, 21928, 60280, 623876) < 0.0);
+ }
}