You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by to...@apache.org on 2012/11/21 10:43:23 UTC

svn commit: r1412031 - in /hama/trunk: examples/src/test/java/org/apache/hama/examples/ examples/src/test/resources/ ml/src/main/java/org/apache/hama/ml/regression/ ml/src/test/java/org/apache/hama/ml/regression/

Author: tommaso
Date: Wed Nov 21 09:43:22 2012
New Revision: 1412031

URL: http://svn.apache.org/viewvc?rev=1412031&view=rev
Log:
[HAMA-668] - let LogisticRegressionModel use BigDecimals internally to avoid number instabilities

Added:
    hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java   (contents, props changed)
      - copied, changed from r1411328, hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java
    hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java   (with props)
    hama/trunk/examples/src/test/resources/linear_regression_sample.txt   (contents, props changed)
      - copied, changed from r1411328, hama/trunk/examples/src/test/resources/gd_file_sample.txt
    hama/trunk/examples/src/test/resources/logistic_regression_sample.txt   (with props)
Removed:
    hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java
    hama/trunk/examples/src/test/resources/gd_file_sample.txt
Modified:
    hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java
    hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java

Copied: hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java (from r1411328, hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java)
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java?p2=hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java&p1=hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java&r1=1411328&r2=1412031&rev=1412031&view=diff
==============================================================================
--- hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java (original)
+++ hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java Wed Nov 21 09:43:22 2012
@@ -22,12 +22,12 @@ import org.junit.Test;
 import static org.junit.Assert.fail;
 
 /**
- * Testcase for {@link GradientDescentExample}
+ * Testcase for {@link GradientDescentExample} for 'linear regression'
  */
-public class GradientDescentTest {
+public class LinearRegressionTest {
   @Test
   public void testCorrectGDWithLinearRegressionExecution() throws Exception {
-    GradientDescentExample.main(new String[]{"src/test/resources/gd_file_sample.txt"});
+    GradientDescentExample.main(new String[]{"src/test/resources/linear_regression_sample.txt"});
   }
 
   @Test

Propchange: hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java?rev=1412031&view=auto
==============================================================================
--- hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java (added)
+++ hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java Wed Nov 21 09:43:22 2012
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.examples;
+
+import org.junit.Test;
+
+/**
+ * Testcase for {@link GradientDescentExample} execution for 'logistic regression'
+ */
+public class LogisticRegressionTest {
+
+  @Test
+  public void testCorrectGDWithLogisticRegressionExecution() throws Exception {
+    GradientDescentExample.main(new String[]{"src/test/resources/logistic_regression_sample.txt", "logistic"});
+  }
+}

Propchange: hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Copied: hama/trunk/examples/src/test/resources/linear_regression_sample.txt (from r1411328, hama/trunk/examples/src/test/resources/gd_file_sample.txt)
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/test/resources/linear_regression_sample.txt?p2=hama/trunk/examples/src/test/resources/linear_regression_sample.txt&p1=hama/trunk/examples/src/test/resources/gd_file_sample.txt&r1=1411328&r2=1412031&rev=1412031&view=diff
==============================================================================
    (empty)

Propchange: hama/trunk/examples/src/test/resources/linear_regression_sample.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: hama/trunk/examples/src/test/resources/logistic_regression_sample.txt
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/test/resources/logistic_regression_sample.txt?rev=1412031&view=auto
==============================================================================
--- hama/trunk/examples/src/test/resources/logistic_regression_sample.txt (added)
+++ hama/trunk/examples/src/test/resources/logistic_regression_sample.txt Wed Nov 21 09:43:22 2012
@@ -0,0 +1,6 @@
+2>1 9 2 4 5 6 7
+1>3 4 5 6 9 1 3
+1>1 1 3 1 1 1 1
+2>2 4 1 1 4 1 8
+1>3 4 5 6 7 8 9
+1>1 3 4 1 4 5 1

Propchange: hama/trunk/examples/src/test/resources/logistic_regression_sample.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java?rev=1412031&r1=1412030&r2=1412031&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java Wed Nov 21 09:43:22 2012
@@ -17,6 +17,9 @@
  */
 package org.apache.hama.ml.regression;
 
+import java.math.BigDecimal;
+import java.math.MathContext;
+
 import org.apache.hama.ml.math.DoubleVector;
 
 /**
@@ -30,20 +33,42 @@ public class LogisticRegressionModel imp
     costFunction = new CostFunction() {
       @Override
       public double calculateCostForItem(DoubleVector x, double y, int m, DoubleVector theta, HypothesisFunction hypothesis) {
-          double h = applyHypothesis(theta, x);
-          assert h > 0 && h < 1 : new StringBuilder("cannot calculate the log of a number equal to / less than zero [h:").
-                  append(h).append(" in log(h) and log(1-h)]").toString();
-          return (-1 * y * Math.log(h) + (1 - y) * Math.log(1 - h)) / m;
+          return (-1d * y * ln(applyHypothesisWithPrecision(theta, x)).doubleValue() + (1d - y) * ln(applyHypothesisWithPrecision(theta, x).subtract(BigDecimal.valueOf(1))).doubleValue()) / m;
       }
     };
   }
-
   @Override
   public double applyHypothesis(DoubleVector theta, DoubleVector x) {
-    return 1d / (1d + Math.exp(-1 * theta.dot(x)));
+      return applyHypothesisWithPrecision(theta, x).doubleValue();
   }
 
-  @Override
+  private BigDecimal applyHypothesisWithPrecision(DoubleVector theta, DoubleVector x) {
+    return BigDecimal.valueOf(1).divide(BigDecimal.valueOf(1d).add(BigDecimal.valueOf(Math.exp(-1d * theta.dot(x)))),
+            MathContext.DECIMAL128);
+  }
+
+  private BigDecimal ln(BigDecimal x) {
+      if (x.equals(BigDecimal.ONE)) {
+          return BigDecimal.ZERO;
+      }
+      x = x.subtract(BigDecimal.ONE);
+      int iterations = 1000;
+      BigDecimal ret = new BigDecimal(iterations + 1);
+      for (long i = iterations; i >= 0; i--) {
+          BigDecimal N = new BigDecimal(i / 2 + 1).pow(2);
+          N = N.multiply(x, MathContext.DECIMAL128);
+          ret = N.divide(ret, MathContext.DECIMAL128);
+
+          N = new BigDecimal(i + 1);
+          ret = ret.add(N, MathContext.DECIMAL128);
+
+      }
+      ret = x.divide(ret, MathContext.DECIMAL128);
+      return ret;
+  }
+
+
+    @Override
   public double calculateCostForItem(DoubleVector x, double y, int m, DoubleVector theta) {
     return costFunction.calculateCostForItem(x, y, m, theta, this);
   }

Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java?rev=1412031&r1=1412030&r2=1412031&view=diff
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java (original)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java Wed Nov 21 09:43:22 2012
@@ -35,7 +35,7 @@ public class LogisticRegressionModelTest
     double y = 1;
     DoubleVector theta = new DenseDoubleVector(new double[]{1, 1, 1});
     Double cost = logisticRegressionModel.calculateCostForItem(x, y, 2, theta);
-    assertEquals("wrong cost calculation for logistic regression", Double.valueOf(6.17010948616701E-5), cost);
+    assertEquals("wrong cost calculation for logistic regression", Double.valueOf(6.170109486162941E-5), cost);
   }
 
   @Test
@@ -43,6 +43,6 @@ public class LogisticRegressionModelTest
     LogisticRegressionModel logisticRegressionModel = new LogisticRegressionModel();
     Double hypothesisValue = logisticRegressionModel.applyHypothesis(new DenseDoubleVector(new double[]{1, 1, 1}),
             new DenseDoubleVector(new double[]{2, 3, 4}));
-    assertEquals("wrong hypothesis value for logistic regression", Double.valueOf(0.9998766054240137), hypothesisValue);
+    assertEquals("wrong hypothesis value for logistic regression", Double.valueOf(0.9998766054240138), hypothesisValue);
   }
 }