You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2010/08/22 15:13:36 UTC
svn commit: r987897 - in /commons/proper/math/trunk/src:
main/java/org/apache/commons/math/stat/regression/ site/xdoc/ test/R/
test/java/org/apache/commons/math/stat/regression/
Author: psteitz
Date: Sun Aug 22 13:13:35 2010
New Revision: 987897
URL: http://svn.apache.org/viewvc?rev=987897&view=rev
Log:
Corrected Y variance formula and added error variance methods to return
what were previously reported as Y variances.
JIRA: MATH-392
Reported and patched by Mark Devaney
Modified:
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
commons/proper/math/trunk/src/site/xdoc/changes.xml
commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java?rev=987897&r1=987896&r2=987897&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java Sun Aug 22 13:13:35 2010
@@ -22,6 +22,7 @@ import org.apache.commons.math.linear.Re
import org.apache.commons.math.linear.Array2DRowRealMatrix;
import org.apache.commons.math.linear.RealVector;
import org.apache.commons.math.linear.ArrayRealVector;
+import org.apache.commons.math.stat.descriptive.moment.Variance;
/**
* Abstract base class for implementations of MultipleLinearRegression.
@@ -148,7 +149,8 @@ public abstract class AbstractMultipleLi
*/
public double[] estimateRegressionParametersStandardErrors() {
double[][] betaVariance = estimateRegressionParametersVariance();
- double sigma = calculateYVariance();
+ RealVector residuals = calculateResiduals();
+ double sigma = calculateErrorVariance();
int length = betaVariance[0].length;
double[] result = new double[length];
for (int i = 0; i < length; i++) {
@@ -165,6 +167,25 @@ public abstract class AbstractMultipleLi
}
/**
+ * Estimates the variance of the error.
+ *
+ * @return estimate of the error variance
+ */
+ public double estimateErrorVariance() {
+ return calculateErrorVariance();
+
+ }
+
+ /**
+ * Estimates the standard error of the regression.
+ *
+ * @return regression standard error
+ */
+ public double estimateRegressionStandardError() {
+ return Math.sqrt(estimateErrorVariance());
+ }
+
+ /**
* Calculates the beta of multiple linear regression in matrix notation.
*
* @return beta
@@ -179,12 +200,31 @@ public abstract class AbstractMultipleLi
*/
protected abstract RealMatrix calculateBetaVariance();
+
/**
- * Calculates the Y variance of multiple linear regression.
+ * Calculates the variance of the y values.
*
* @return Y variance
*/
- protected abstract double calculateYVariance();
+ protected double calculateYVariance() {
+ return new Variance().evaluate(Y.getData());
+ }
+
+ /**
+ * <p>Calculates the variance of the error term.</p>
+ * Uses the formula <pre>
+ * var(u) = u · u / (n - k)
+ * </pre>
+ * where n and k are the row and column dimensions of the design
+ * matrix X.
+ *
+ * @return error variance estimate
+ */
+ protected double calculateErrorVariance() {
+ RealVector residuals = calculateResiduals();
+ return residuals.dotProduct(residuals) /
+ (X.getRowDimension() - X.getColumnDimension());
+ }
/**
* Calculates the residuals of multiple linear regression in matrix
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java?rev=987897&r1=987896&r2=987897&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java Sun Aug 22 13:13:35 2010
@@ -21,7 +21,6 @@ import org.apache.commons.math.linear.Re
import org.apache.commons.math.linear.Array2DRowRealMatrix;
import org.apache.commons.math.linear.RealVector;
-
/**
* The GLS implementation of the multiple linear regression.
*
@@ -101,7 +100,7 @@ public class GLSMultipleLinearRegression
}
/**
- * Calculates the variance on the beta by GLS.
+ * Calculates the variance on the beta.
* <pre>
* Var(b)=(X' Omega^-1 X)^-1
* </pre>
@@ -114,18 +113,23 @@ public class GLSMultipleLinearRegression
return new LUDecompositionImpl(XTOIX).getSolver().getInverse();
}
+
/**
- * Calculates the variance on the y by GLS.
+ * Calculates the estimated variance of the error term using the formula
* <pre>
- * Var(y)=Tr(u' Omega^-1 u)/(n-k)
+ * Var(u) = Tr(u' Omega^-1 u)/(n-k)
* </pre>
- * @return The Y variance
+ * where n and k are the row and column dimensions of the design
+ * matrix X.
+ *
+ * @return error variance
*/
@Override
- protected double calculateYVariance() {
+ protected double calculateErrorVariance() {
RealVector residuals = calculateResiduals();
double t = residuals.dotProduct(getOmegaInverse().operate(residuals));
return t / (X.getRowDimension() - X.getColumnDimension());
+
}
}
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java?rev=987897&r1=987896&r2=987897&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java Sun Aug 22 13:13:35 2010
@@ -49,7 +49,7 @@ import org.apache.commons.math.linear.Re
* (R<sup>T</sup>)<sup>-1</sup> R<sup>T</sup> R b = (R<sup>T</sup>)<sup>-1</sup> R<sup>T</sup> Q<sup>T</sup> y <br/>
* R b = Q<sup>T</sup> y
* </p>
- * Given Q and R, the last equation is solved by back-subsitution.</p>
+ * Given Q and R, the last equation is solved by back-substitution.</p>
*
* @version $Revision$ $Date$
* @since 2.0
@@ -161,19 +161,4 @@ public class OLSMultipleLinearRegression
return Rinv.multiply(Rinv.transpose());
}
-
- /**
- * <p>Calculates the variance on the Y by OLS.
- * </p>
- * <p> Var(y) = Tr(u<sup>T</sup>u)/(n - k)
- * </p>
- * @return The Y variance
- */
- @Override
- protected double calculateYVariance() {
- RealVector residuals = calculateResiduals();
- return residuals.dotProduct(residuals) /
- (X.getRowDimension() - X.getColumnDimension());
- }
-
}
Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=987897&r1=987896&r2=987897&view=diff
==============================================================================
--- commons/proper/math/trunk/src/site/xdoc/changes.xml (original)
+++ commons/proper/math/trunk/src/site/xdoc/changes.xml Sun Aug 22 13:13:35 2010
@@ -52,6 +52,13 @@ The <action> type attribute can be add,u
If the output is not quite correct, check for invisible trailing spaces!
-->
<release version="2.2" date="TBD" description="TBD">
+ <action dev="psteitz" type="fix" issue="MATH-392" due-to="Mark Devaney">
+ Corrected the formula used for Y variance returned by calculateYVariance and associated
+ methods in multiple regression classes (AbstractMultipleLinearRegression,
+ OLSMultipleLinearRegression, GLSMultipleLinearRegression). These methods previously returned
+ estimates of the variance in the model error term. New "calulateErrorVariance" methods have
+ been added to compute what was previously returned by calculateYVariance.
+ </action>
<action dev="dimpbx" type="fix" issue="MATH-406">
Bug fixed in Levenberg-Marquardt (handling of weights).
</action>
Modified: commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases?rev=987897&r1=987896&r2=987897&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases (original)
+++ commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases Sun Aug 22 13:13:35 2010
@@ -32,10 +32,11 @@ options(digits=16) # override
# function to verify OLS computations
verifyRegression <- function(model, expectedBeta, expectedResiduals,
- expectedErrors, modelName) {
+ expectedErrors, expectedStdError, modelName) {
betaHat <- as.vector(coefficients(model))
residuals <- as.vector(residuals(model))
errors <- as.vector(as.matrix(coefficients(summary(model)))[,2])
+ stdError <- summary(model)$sigma
output <- c("Parameter test dataset = ", modelName)
if (assertEquals(expectedBeta,betaHat,tol,"Parameters")) {
displayPadded(output, SUCCEEDED, WIDTH)
@@ -53,7 +54,13 @@ verifyRegression <- function(model, expe
displayPadded(output, SUCCEEDED, WIDTH)
} else {
displayPadded(output, FAILED, WIDTH)
- }
+ }
+ output <- c("Standard Error test dataset = ", modelName)
+ if (assertEquals(expectedStdError,stdError,tol,"Regression Standard Error")) {
+ displayPadded(output, SUCCEEDED, WIDTH)
+ } else {
+ displayPadded(output, FAILED, WIDTH)
+ }
}
#--------------------------------------------------------------------------
@@ -70,7 +77,8 @@ model <- lm(y ~ x1 + x2 + x3 + x4 + x5)
expectedBeta <- c(11.0,0.5,0.666666666666667,0.75,0.8,0.8333333333333333)
expectedResiduals <- c(0,0,0,0,0,0)
expectedErrors <- c(NaN,NaN,NaN,NaN,NaN,NaN)
-verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors,
+expectedStdError <- NaN
+verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, expectedStdError,
"perfect fit")
# Longly
@@ -125,8 +133,9 @@ expectedResiduals <- c( 267.340029759711
-13.18035686637081,14.30477260005235,455.394094551857,-17.26892711483297,
-39.0550425226967,-155.5499735953195,-85.6713080421283,341.9315139607727,
-206.7578251937366)
+expectedStdError <- 304.8540735619638
-verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors,
+verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, expectedStdError,
"Longly")
# Swiss Fertility (R dataset named "swiss")
@@ -215,7 +224,9 @@ expectedResiduals <- c(7.104426785973051
15.0147574652763112,4.8625103516321015,-7.1597256413907706,
-0.4515205619767598,-10.2916870903837587,-15.7812984571900063)
-verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors,
+expectedStdError <- 7.73642194433223
+
+verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, expectedStdError,
"Swiss Fertility")
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java?rev=987897&r1=987896&r2=987897&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java Sun Aug 22 13:13:35 2010
@@ -18,6 +18,8 @@ package org.apache.commons.math.stat.reg
import org.junit.Before;
import org.junit.Test;
+import org.apache.commons.math.TestUtils;
+import org.apache.commons.math.stat.StatUtils;
public class GLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest {
@@ -121,4 +123,16 @@ public class GLSMultipleLinearRegression
return y.length;
}
+ /**
+ * test calculateYVariance
+ */
+ @Test
+ public void testYVariance() {
+
+ // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
+
+ GLSMultipleLinearRegression model = new GLSMultipleLinearRegression();
+ model.newSampleData(y, x, omega);
+ TestUtils.assertEquals(model.calculateYVariance(), 3.5, 0);
+ }
}
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java?rev=987897&r1=987896&r2=987897&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java Sun Aug 22 13:13:35 2010
@@ -24,6 +24,7 @@ import org.apache.commons.math.linear.Ma
import org.apache.commons.math.linear.MatrixVisitorException;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.Array2DRowRealMatrix;
+import org.apache.commons.math.stat.StatUtils;
import org.junit.Before;
import org.junit.Test;
@@ -82,7 +83,7 @@ public class OLSMultipleLinearRegression
}
@Test
- public void testPerfectFit() {
+ public void testPerfectFit() throws Exception {
double[] betaHat = regression.estimateRegressionParameters();
TestUtils.assertEquals(betaHat,
new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
@@ -108,6 +109,7 @@ public class OLSMultipleLinearRegression
assertEquals(0.0,
errors.subtract(referenceVariance).getNorm(),
5.0e-16 * referenceVariance.getNorm());
+
}
@@ -122,7 +124,7 @@ public class OLSMultipleLinearRegression
* http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat
*/
@Test
- public void testLongly() {
+ public void testLongly() throws Exception {
// Y values are first, then independent vars
// Each row is one observation
double[] design = new double[] {
@@ -180,6 +182,11 @@ public class OLSMultipleLinearRegression
0.214274163161675,
0.226073200069370,
455.478499142212}, errors, 1E-6);
+
+ // Check regression standard error against R
+ assertEquals(304.8540735619638, model.estimateRegressionStandardError(), 1E-10);
+
+ checkVarianceConsistency(model);
}
/**
@@ -187,7 +194,7 @@ public class OLSMultipleLinearRegression
* Data Source: R datasets package
*/
@Test
- public void testSwissFertility() {
+ public void testSwissFertility() throws Exception {
double[] design = new double[] {
80.2,17.0,15,12,9.96,
83.1,45.1,6,9,84.84,
@@ -283,6 +290,11 @@ public class OLSMultipleLinearRegression
0.27410957467466,
0.19454551679325,
0.03726654773803}, errors, 1E-10);
+
+ // Check regression standard error against R
+ assertEquals(7.73642194433223, model.estimateRegressionStandardError(), 1E-12);
+
+ checkVarianceConsistency(model);
}
/**
@@ -354,4 +366,34 @@ public class OLSMultipleLinearRegression
double[] hatResiduals = I.subtract(hat).operate(model.Y).getData();
TestUtils.assertEquals(residuals, hatResiduals, 10e-12);
}
+
+ /**
+ * test calculateYVariance
+ */
+ @Test
+ public void testYVariance() {
+
+ // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
+
+ OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
+ model.newSampleData(y, x);
+ TestUtils.assertEquals(model.calculateYVariance(), 3.5, 0);
+ }
+
+ /**
+ * Verifies that calculateYVariance and calculateResidualVariance return consistent
+ * values with direct variance computation from Y, residuals, respectively.
+ */
+ protected void checkVarianceConsistency(OLSMultipleLinearRegression model) throws Exception {
+ // Check Y variance consistency
+ TestUtils.assertEquals(StatUtils.variance(model.Y.getData()), model.calculateYVariance(), 0);
+
+ // Check residual variance consistency
+ double[] residuals = model.calculateResiduals().getData();
+ RealMatrix X = model.X;
+ TestUtils.assertEquals(
+ StatUtils.variance(model.calculateResiduals().getData()) * (residuals.length - 1),
+ model.calculateErrorVariance() * (X.getRowDimension() - X.getColumnDimension()), 1E-20);
+
+ }
}