You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by to...@apache.org on 2012/11/21 10:43:23 UTC
svn commit: r1412031 - in /hama/trunk:
examples/src/test/java/org/apache/hama/examples/ examples/src/test/resources/
ml/src/main/java/org/apache/hama/ml/regression/
ml/src/test/java/org/apache/hama/ml/regression/
Author: tommaso
Date: Wed Nov 21 09:43:22 2012
New Revision: 1412031
URL: http://svn.apache.org/viewvc?rev=1412031&view=rev
Log:
[HAMA-668] - let LogisticRegressionModel use BigDecimals internally to avoid number instabilities
Added:
hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java (contents, props changed)
- copied, changed from r1411328, hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java
hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java (with props)
hama/trunk/examples/src/test/resources/linear_regression_sample.txt (contents, props changed)
- copied, changed from r1411328, hama/trunk/examples/src/test/resources/gd_file_sample.txt
hama/trunk/examples/src/test/resources/logistic_regression_sample.txt (with props)
Removed:
hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java
hama/trunk/examples/src/test/resources/gd_file_sample.txt
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java
hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java
Copied: hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java (from r1411328, hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java)
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java?p2=hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java&p1=hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java&r1=1411328&r2=1412031&rev=1412031&view=diff
==============================================================================
--- hama/trunk/examples/src/test/java/org/apache/hama/examples/GradientDescentTest.java (original)
+++ hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java Wed Nov 21 09:43:22 2012
@@ -22,12 +22,12 @@ import org.junit.Test;
import static org.junit.Assert.fail;
/**
- * Testcase for {@link GradientDescentExample}
+ * Testcase for {@link GradientDescentExample} for 'linear regression'
*/
-public class GradientDescentTest {
+public class LinearRegressionTest {
@Test
public void testCorrectGDWithLinearRegressionExecution() throws Exception {
- GradientDescentExample.main(new String[]{"src/test/resources/gd_file_sample.txt"});
+ GradientDescentExample.main(new String[]{"src/test/resources/linear_regression_sample.txt"});
}
@Test
Propchange: hama/trunk/examples/src/test/java/org/apache/hama/examples/LinearRegressionTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java?rev=1412031&view=auto
==============================================================================
--- hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java (added)
+++ hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java Wed Nov 21 09:43:22 2012
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.examples;
+
+import org.junit.Test;
+
+/**
+ * Testcase for {@link GradientDescentExample} execution for 'logistic regression'
+ */
+public class LogisticRegressionTest {
+
+ @Test
+ public void testCorrectGDWithLogisticRegressionExecution() throws Exception {
+ GradientDescentExample.main(new String[]{"src/test/resources/logistic_regression_sample.txt", "logistic"});
+ }
+}
Propchange: hama/trunk/examples/src/test/java/org/apache/hama/examples/LogisticRegressionTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Copied: hama/trunk/examples/src/test/resources/linear_regression_sample.txt (from r1411328, hama/trunk/examples/src/test/resources/gd_file_sample.txt)
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/test/resources/linear_regression_sample.txt?p2=hama/trunk/examples/src/test/resources/linear_regression_sample.txt&p1=hama/trunk/examples/src/test/resources/gd_file_sample.txt&r1=1411328&r2=1412031&rev=1412031&view=diff
==============================================================================
(empty)
Propchange: hama/trunk/examples/src/test/resources/linear_regression_sample.txt
------------------------------------------------------------------------------
svn:eol-style = native
Added: hama/trunk/examples/src/test/resources/logistic_regression_sample.txt
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/test/resources/logistic_regression_sample.txt?rev=1412031&view=auto
==============================================================================
--- hama/trunk/examples/src/test/resources/logistic_regression_sample.txt (added)
+++ hama/trunk/examples/src/test/resources/logistic_regression_sample.txt Wed Nov 21 09:43:22 2012
@@ -0,0 +1,6 @@
+2>1 9 2 4 5 6 7
+1>3 4 5 6 9 1 3
+1>1 1 3 1 1 1 1
+2>2 4 1 1 4 1 8
+1>3 4 5 6 7 8 9
+1>1 3 4 1 4 5 1
Propchange: hama/trunk/examples/src/test/resources/logistic_regression_sample.txt
------------------------------------------------------------------------------
svn:eol-style = native
Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java?rev=1412031&r1=1412030&r2=1412031&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java Wed Nov 21 09:43:22 2012
@@ -17,6 +17,9 @@
*/
package org.apache.hama.ml.regression;
+import java.math.BigDecimal;
+import java.math.MathContext;
+
import org.apache.hama.ml.math.DoubleVector;
/**
@@ -30,20 +33,42 @@ public class LogisticRegressionModel imp
costFunction = new CostFunction() {
@Override
public double calculateCostForItem(DoubleVector x, double y, int m, DoubleVector theta, HypothesisFunction hypothesis) {
- double h = applyHypothesis(theta, x);
- assert h > 0 && h < 1 : new StringBuilder("cannot calculate the log of a number equal to / less than zero [h:").
- append(h).append(" in log(h) and log(1-h)]").toString();
- return (-1 * y * Math.log(h) + (1 - y) * Math.log(1 - h)) / m;
+ return (-1d * y * ln(applyHypothesisWithPrecision(theta, x)).doubleValue() + (1d - y) * ln(applyHypothesisWithPrecision(theta, x).subtract(BigDecimal.valueOf(1))).doubleValue()) / m;
}
};
}
-
@Override
public double applyHypothesis(DoubleVector theta, DoubleVector x) {
- return 1d / (1d + Math.exp(-1 * theta.dot(x)));
+ return applyHypothesisWithPrecision(theta, x).doubleValue();
}
- @Override
+ private BigDecimal applyHypothesisWithPrecision(DoubleVector theta, DoubleVector x) {
+ return BigDecimal.valueOf(1).divide(BigDecimal.valueOf(1d).add(BigDecimal.valueOf(Math.exp(-1d * theta.dot(x)))),
+ MathContext.DECIMAL128);
+ }
+
+ private BigDecimal ln(BigDecimal x) {
+ if (x.equals(BigDecimal.ONE)) {
+ return BigDecimal.ZERO;
+ }
+ x = x.subtract(BigDecimal.ONE);
+ int iterations = 1000;
+ BigDecimal ret = new BigDecimal(iterations + 1);
+ for (long i = iterations; i >= 0; i--) {
+ BigDecimal N = new BigDecimal(i / 2 + 1).pow(2);
+ N = N.multiply(x, MathContext.DECIMAL128);
+ ret = N.divide(ret, MathContext.DECIMAL128);
+
+ N = new BigDecimal(i + 1);
+ ret = ret.add(N, MathContext.DECIMAL128);
+
+ }
+ ret = x.divide(ret, MathContext.DECIMAL128);
+ return ret;
+ }
+
+
+ @Override
public double calculateCostForItem(DoubleVector x, double y, int m, DoubleVector theta) {
return costFunction.calculateCostForItem(x, y, m, theta, this);
}
Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java?rev=1412031&r1=1412030&r2=1412031&view=diff
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java (original)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/LogisticRegressionModelTest.java Wed Nov 21 09:43:22 2012
@@ -35,7 +35,7 @@ public class LogisticRegressionModelTest
double y = 1;
DoubleVector theta = new DenseDoubleVector(new double[]{1, 1, 1});
Double cost = logisticRegressionModel.calculateCostForItem(x, y, 2, theta);
- assertEquals("wrong cost calculation for logistic regression", Double.valueOf(6.17010948616701E-5), cost);
+ assertEquals("wrong cost calculation for logistic regression", Double.valueOf(6.170109486162941E-5), cost);
}
@Test
@@ -43,6 +43,6 @@ public class LogisticRegressionModelTest
LogisticRegressionModel logisticRegressionModel = new LogisticRegressionModel();
Double hypothesisValue = logisticRegressionModel.applyHypothesis(new DenseDoubleVector(new double[]{1, 1, 1}),
new DenseDoubleVector(new double[]{2, 3, 4}));
- assertEquals("wrong hypothesis value for logistic regression", Double.valueOf(0.9998766054240137), hypothesisValue);
+ assertEquals("wrong hypothesis value for logistic regression", Double.valueOf(0.9998766054240138), hypothesisValue);
}
}