You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/30 00:35:52 UTC
svn commit: r939501 - in /lucene/mahout/trunk/math/src:
main/java/org/apache/mahout/math/stats/LogLikelihood.java
test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
Author: srowen
Date: Thu Apr 29 22:35:52 2010
New Revision: 939501
URL: http://svn.apache.org/viewvc?rev=939501&view=rev
Log:
Per Shashikant, make sure LLR is not negative. Streamline implementation and add a few tests.
Modified:
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java?rev=939501&r1=939500&r2=939501&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java Thu Apr 29 22:35:52 2010
@@ -20,29 +20,28 @@ package org.apache.mahout.math.stats;
/**
* Utility methods for working with log-likelihood
*/
-public class LogLikelihood {
+public final class LogLikelihood {
private LogLikelihood() {
}
/**
* Calculate the Shannon entropy.
- * @param elements TODO FILL IN HERE
* @return The entropy value for the elements
*/
public static double entropy(int... elements) {
- double sum = 0;
- for (int element : elements) {
- sum += element;
- }
+ double sum = 0.0;
double result = 0.0;
- for (int x : elements) {
- if (x < 0) {
- throw new IllegalArgumentException("Should not have negative count for entropy computation: (" + x + ')');
+ for (int element : elements) {
+ if (element < 0) {
+ throw new IllegalArgumentException("Should not have negative count for entropy computation: (" + element + ')');
+ }
+ if (element > 0) {
+ result += element * Math.log(element);
+ sum += element;
}
- int zeroFlag = (x == 0 ? 1 : 0);
- result += x * Math.log((x + zeroFlag) / sum);
}
+ result -= sum * Math.log(sum);
return -result;
}
@@ -68,7 +67,11 @@ public class LogLikelihood {
double rowEntropy = entropy(k11, k12) + entropy(k21, k22);
double columnEntropy = entropy(k11, k21) + entropy(k12, k22);
double matrixEntropy = entropy(k11, k12, k21, k22);
- return 2 * (matrixEntropy - rowEntropy - columnEntropy);
+ if (rowEntropy + columnEntropy > matrixEntropy) {
+ // round off error
+ return 0.0;
+ }
+ return 2.0 * (matrixEntropy - rowEntropy - columnEntropy);
}
/**
@@ -88,6 +91,10 @@ public class LogLikelihood {
*/
public static double rootLogLikelihoodRatio(int k11, int k12, int k21, int k22) {
double llr = logLikelihoodRatio(k11, k12, k21, k22);
- return Math.signum(((double) k11 / (k11+k12)) - ((double) k21 / (k21+k22))) * Math.sqrt(llr);
+ double sqrt = Math.sqrt(llr);
+ if (((double) k11 / (k11+k12)) < ((double) k21 / (k21+k22))) {
+ sqrt = -sqrt;
+ }
+ return sqrt;
}
}
Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java?rev=939501&r1=939500&r2=939501&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java Thu Apr 29 22:35:52 2010
@@ -25,6 +25,7 @@ public class LogLikelihoodTest extends A
public void testEntropy() throws Exception {
assertEquals(1.386294, LogLikelihood.entropy(1, 1), 0.0001);
+ assertEquals(0.0, LogLikelihood.entropy(1), 0.0);
//TODO: more tests here
try {
LogLikelihood.entropy(-1, -1);//exception
@@ -37,9 +38,9 @@ public class LogLikelihoodTest extends A
@Test
public void testLogLikelihood() throws Exception {
//TODO: check the epsilons
- assertEquals(2.772589, LogLikelihood.logLikelihoodRatio(1, 0, 0, 1), 0.0001);
- assertEquals(27.72589, LogLikelihood.logLikelihoodRatio(10, 0, 0, 10), 0.0001);
- assertEquals(39.33052, LogLikelihood.logLikelihoodRatio(5, 1995, 0, 100000), 0.0001);
+ assertEquals(2.772589, LogLikelihood.logLikelihoodRatio(1, 0, 0, 1), 0.000001);
+ assertEquals(27.72589, LogLikelihood.logLikelihoodRatio(10, 0, 0, 10), 0.00001);
+ assertEquals(39.33052, LogLikelihood.logLikelihoodRatio(5, 1995, 0, 100000), 0.00001);
assertEquals(4730.737, LogLikelihood.logLikelihoodRatio(1000, 1995, 1000, 100000), 0.001);
assertEquals(5734.343, LogLikelihood.logLikelihoodRatio(1000, 1000, 1000, 100000), 0.001);
assertEquals(5714.932, LogLikelihood.logLikelihoodRatio(1000, 1000, 1000, 99000), 0.001);
@@ -53,4 +54,9 @@ public class LogLikelihoodTest extends A
// negative because k11 is lower than expected
assertTrue(LogLikelihood.rootLogLikelihoodRatio(36, 21928, 60280, 623876) < 0.0);
}
+
+ @Test
+ public void testRootNegativeLLR() {
+ assertEquals(0.0, LogLikelihood.rootLogLikelihoodRatio(6, 7567, 1924, 2426487), 0.00000001);
+ }
}