You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2010/09/08 03:24:52 UTC
svn commit: r993574 - in /commons/proper/math:
branches/MATH_2_X/src/main/java/org/apache/commons/math/exception/util/
branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/
branches/MATH_2_X/src/main/resources/META-INF/localization/ ...
Author: psteitz
Date: Wed Sep 8 01:24:51 2010
New Revision: 993574
URL: http://svn.apache.org/viewvc?rev=993574&view=rev
Log:
* Modified multiple regression newSample methods to ensure that by default in all cases,
regression models are estimated with intercept terms. Prior to the fix for this issue,
newXSampleData(double[][]), newSampleData(double[], double[][]) and
newSampleData(double[], double[][], double[][]) all required columns of "1's"
to be inserted into the x[][] arrays to create a model with an intercept term;
while newSampleData(double[], int, int) created a model including an intercept
term without requiring the unitary column. All methods have been changed to
eliminate the need for users to add unitary columns to specify regression models.
* Improved javadoc
* Improved tests
JIRA: MATH-411
Modified:
commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java
commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java
commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
commons/proper/math/branches/MATH_2_X/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties
commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml
commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java
commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java
commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java
commons/proper/math/trunk/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
commons/proper/math/trunk/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties
commons/proper/math/trunk/src/site/xdoc/changes.xml
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java
Modified: commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java
URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java (original)
+++ commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java Wed Sep 8 01:24:51 2010
@@ -126,6 +126,7 @@ public enum LocalizedFormats implements
INVALID_INTERVAL_INITIAL_VALUE_PARAMETERS("invalid interval, initial value parameters: lower={0}, initial={1}, upper={2}"),
INVALID_ITERATIONS_LIMITS("invalid iteration limits: min={0}, max={1}"),
INVALID_MAX_ITERATIONS("bad value for maximum iterations number: {0}"),
+ INVALID_REGRESSION_ARRAY("input data array length = {0} does not match the number of observations = {1} and the number of regressors = {2}"),
INVALID_ROUNDING_METHOD("invalid rounding method {0}, valid methods: {1} ({2}), {3} ({4}), {5} ({6}), {7} ({8}), {9} ({10}), {11} ({12}), {13} ({14}), {15} ({16})"),
ITERATOR_EXHAUSTED("iterator exhausted"),
LCM_OVERFLOW_32_BITS("overflow: lcm({0}, {1}) is 2^31"),
Modified: commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java
URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java (original)
+++ commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java Wed Sep 8 01:24:51 2010
@@ -40,14 +40,47 @@ public abstract class AbstractMultipleLi
protected RealVector Y;
/**
- * Loads model x and y sample data from a flat array of data, overriding any previous sample.
- * Assumes that rows are concatenated with y values first in each row.
+ * <p>Loads model x and y sample data from a flat input array, overriding any previous sample.
+ * </p>
+ * <p>Assumes that rows are concatenated with y values first in each row. For example, an input
+ * <code>data</code> array containing the sequence of values (1, 2, 3, 4, 5, 6, 7, 8, 9) with
+ * <code>nobs = 3</code> and <code>nvars = 2</code> creates a regression dataset with two
+ * independent variables, as below:
+ * <pre>
+ * y x[0] x[1]
+ * --------------
+ * 1 2 3
+ * 4 5 6
+ * 7 8 9
+ * </pre>
+ * </p>
+ * <p>Note that there is no need to add an initial unitary column (column of 1's) when
+ * specifying a model including an intercept term.
+ * </p>
+ * <p>Throws IllegalArgumentException if any of the following preconditions fail:
+ * <ul><li><code>data</code> cannot be null</li>
+ * <li><code>data.length = nobs * (nvars + 1)</li>
+ * <li><code>nobs > nvars</code></li></ul>
+ * </p>
*
* @param data input data array
* @param nobs number of observations (rows)
* @param nvars number of independent variables (columns, not counting y)
+ * @throws IllegalArgumentException if the preconditions are not met
*/
public void newSampleData(double[] data, int nobs, int nvars) {
+ if (data == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (data.length != nobs * (nvars + 1)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INVALID_REGRESSION_ARRAY, data.length, nobs, nvars);
+ }
+ if (nobs <= nvars) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS);
+ }
double[] y = new double[nobs];
double[][] x = new double[nobs][nvars + 1];
int pointer = 0;
@@ -63,30 +96,82 @@ public abstract class AbstractMultipleLi
}
/**
- * Loads new y sample data, overriding any previous sample
+ * Loads new y sample data, overriding any previous data.
*
- * @param y the [n,1] array representing the y sample
+ * @param y the array representing the y sample
+ * @throws IllegalArgumentException if y is null or empty
*/
protected void newYSampleData(double[] y) {
+ if (y == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (y.length == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
this.Y = new ArrayRealVector(y);
}
/**
- * Loads new x sample data, overriding any previous sample
- *
- * @param x the [n,k] array representing the x sample
+ * <p>Loads new x sample data, overriding any previous data.
+ * </p>
+ * The input <code>x</code> array should have one row for each sample
+ * observation, with columns corresponding to independent variables.
+ * For example, if <pre>
+ * <code> x = new double[][] {{1, 2}, {3, 4}, {5, 6}} </code></pre>
+ * then <code>setXSampleData(x) </code> results in a model with two independent
+ * variables and 3 observations:
+ * <pre>
+ * x[0] x[1]
+ * ----------
+ * 1 2
+ * 3 4
+ * 5 6
+ * </pre>
+ * </p>
+ * <p>Note that there is no need to add an initial unitary column (column of 1's) when
+ * specifying a model including an intercept term.
+ * </p>
+ * @param x the rectangular array representing the x sample
+ * @throws IllegalArgumentException if x is null, empty or not rectangular
*/
protected void newXSampleData(double[][] x) {
- this.X = new Array2DRowRealMatrix(x);
+ if (x == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (x.length == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
+ final int nVars = x[0].length;
+ final double[][] xAug = new double[x.length][nVars + 1];
+ for (int i = 0; i < x.length; i++) {
+ if (x[i].length != nVars) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
+ x[i].length, nVars);
+ }
+ xAug[i][0] = 1.0d;
+ System.arraycopy(x[i], 0, xAug[i], 1, nVars);
+ }
+ this.X = new Array2DRowRealMatrix(xAug, false);
}
/**
- * Validates sample data.
+ * Validates sample data. Checks that
+ * <ul><li>Neither x nor y is null or empty;</li>
+ * <li>The length (i.e. number of rows) of x equals the length of y</li>
+ * <li>x has at least one more row than it has columns (i.e. there is
+ * sufficient data to estimate regression coefficients for each of the
+ * columns in x plus an intercept.</li>
+ * </ul>
+ *
+ * @param x the [n,k] array representing the x data
+ * @param y the [n,1] array representing the y data
+ * @throws IllegalArgumentException if any of the checks fail
*
- * @param x the [n,k] array representing the x sample
- * @param y the [n,1] array representing the y sample
- * @throws IllegalArgumentException if the x and y array data are not
- * compatible for the regression
*/
protected void validateSampleData(double[][] x, double[] y) {
if ((x == null) || (y == null) || (x.length != y.length)) {
@@ -94,7 +179,12 @@ public abstract class AbstractMultipleLi
LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
(x == null) ? 0 : x.length,
(y == null) ? 0 : y.length);
- } else if ((x.length > 0) && (x[0].length > x.length)) {
+ }
+ if (x.length == 0) { // Must be no y data either
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
+ if (x[0].length + 1 > x.length) {
throw MathRuntimeException.createIllegalArgumentException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
x.length, x[0].length);
@@ -102,12 +192,13 @@ public abstract class AbstractMultipleLi
}
/**
- * Validates sample data.
+ * Validates that the x data and covariance matrix have the same
+ * number of rows and that the covariance matrix is square.
*
* @param x the [n,k] array representing the x sample
* @param covariance the [n,n] array representing the covariance matrix
- * @throws IllegalArgumentException if the x sample data or covariance
- * matrix are not compatible for the regression
+ * @throws IllegalArgumentException if the number of rows in x is not equal
+ * to the number of rows in covariance or covariance is not square.
*/
protected void validateCovarianceData(double[][] x, double[][] covariance) {
if (x.length != covariance.length) {
Modified: commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java (original)
+++ commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java Wed Sep 8 01:24:51 2010
@@ -77,8 +77,7 @@ public class OLSMultipleLinearRegression
/**
* {@inheritDoc}
- *
- * Computes and caches QR decomposition of the X matrix
+ * <p>This implementation computes and caches the QR decomposition of the X matrix.</p>
*/
@Override
public void newSampleData(double[] data, int nobs, int nvars) {
@@ -132,7 +131,7 @@ public class OLSMultipleLinearRegression
}
/**
- * Returns the sum of square residuals.
+ * Returns the sum of squared residuals.
*
* @return residual sum of squares
*/
@@ -168,22 +167,20 @@ public class OLSMultipleLinearRegression
final double n = X.getRowDimension();
return 1 - (calculateResidualSumOfSquares() * (n - 1)) /
(calculateTotalSumOfSquares() * (n - X.getColumnDimension()));
- // return 1 - ((1 - calculateRSquare()) * (n - 1) / (n - X.getColumnDimension() - 1));
}
/**
- * Loads new x sample data, overriding any previous sample
- *
- * @param x the [n,k] array representing the x sample
+ * {@inheritDoc}
+ * <p>This implementation computes and caches the QR decomposition of the X matrix once it is successfully loaded.</p>
*/
@Override
protected void newXSampleData(double[][] x) {
- this.X = new Array2DRowRealMatrix(x);
+ super.newXSampleData(x);
qr = new QRDecompositionImpl(X);
}
/**
- * Calculates regression coefficients using OLS.
+ * Calculates the regression coefficients using OLS.
*
* @return beta
*/
Modified: commons/proper/math/branches/MATH_2_X/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties
URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/branches/MATH_2_X/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties (original)
+++ commons/proper/math/branches/MATH_2_X/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties Wed Sep 8 01:24:51 2010
@@ -98,6 +98,7 @@ INVALID_BRACKETING_PARAMETERS = param\u0
INVALID_INTERVAL_INITIAL_VALUE_PARAMETERS = param\u00e8tres de l''intervalle initial invalides : borne inf = {0}, valeur initiale = {1}, borne sup = {2}
INVALID_ITERATIONS_LIMITS = limites d''it\u00e9rations invalides : min = {0}, max = {1}
INVALID_MAX_ITERATIONS = valeur invalide pour le nombre maximal d''it\u00e9rations : {0}
+INVALID_REGRESSION_ARRAY= longueur du tableau de donn\u00e9es = {0} ne correspond pas au nombre d'observations = {1} et le nombre de variables explicatives = {2}
INVALID_ROUNDING_METHOD = m\u00e9thode d''arondi {0} invalide, m\u00e9thodes valides : {1} ({2}), {3} ({4}), {5} ({6}), {7} ({8}), {9} ({10}), {11} ({12}), {13} ({14}), {15} ({16})
ITERATOR_EXHAUSTED = it\u00e9ration achev\u00e9e
LCM_OVERFLOW_32_BITS = d\u00e9passement de capacit\u00e9 : le MCM de {0} et {1} vaut 2^31
Modified: commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml (original)
+++ commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml Wed Sep 8 01:24:51 2010
@@ -52,6 +52,19 @@ The <action> type attribute can be add,u
If the output is not quite correct, check for invisible trailing spaces!
-->
<release version="2.2" date="TBD" description="TBD">
+ <action dev="psteitz" type="fix" issue="MATH-411">
+ Modified multiple regression newSample methods to ensure that by default in all cases,
+ regression models are estimated with intercept terms. Prior to the fix for this issue,
+ newXSampleData(double[][]), newSampleData(double[], double[][]) and
+ newSampleData(double[], double[][], double[][]) all required columns of "1's" to be inserted
+ into the x[][] arrays to create a model with an intercept term; while newSampleData(double[], int, int)
+ created a model including an intercept term without requiring the unitary column. All methods have
+ been changed to eliminate the need for users to add unitary columns to specify regression models.
+ <!-- uncomment when MATH-409 is resolved (noIntercept option)
+ Users of OLSMultipleLinearRegression or GLSMultipleLinearRegression versions 2.0 or 2.1 should either
+ verify that their code either does not use the first set of data loading methods above or set the noIntercept
+ property on estimated models to get the previous behavior. -->
+ </action>
<action dev="luc" type="fix" issue="MATH-412" due-to="Bill Rossi">
Added the dfp library providing arbitrary precision floating point computation in the spirit of
IEEE 854-1987 (not exactly as it uses base 1000 instead of base 10). In addition to finite numbers,
Modified: commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java (original)
+++ commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java Wed Sep 8 01:24:51 2010
@@ -16,10 +16,13 @@
*/
package org.apache.commons.math.stat.regression;
+import static org.junit.Assert.assertEquals;
import org.junit.Before;
import org.junit.Test;
import org.apache.commons.math.TestUtils;
-import org.apache.commons.math.stat.StatUtils;
+import org.apache.commons.math.linear.MatrixUtils;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.RealVector;
public class GLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest {
@@ -32,12 +35,12 @@ public class GLSMultipleLinearRegression
public void setUp(){
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
x = new double[6][];
- x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
- x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
- x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
- x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
- x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
- x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
+ x[0] = new double[]{0, 0, 0, 0, 0};
+ x[1] = new double[]{2.0, 0, 0, 0, 0};
+ x[2] = new double[]{0, 3.0, 0, 0, 0};
+ x[3] = new double[]{0, 0, 4.0, 0, 0};
+ x[4] = new double[]{0, 0, 0, 5.0, 0};
+ x[5] = new double[]{0, 0, 0, 0, 6.0};
omega = new double[6][];
omega[0] = new double[]{1.0, 0, 0, 0, 0, 0};
omega[1] = new double[]{0, 2.0, 0, 0, 0, 0};
@@ -115,7 +118,7 @@ public class GLSMultipleLinearRegression
@Override
protected int getNumberOfRegressors() {
- return x[0].length;
+ return x[0].length + 1;
}
@Override
@@ -135,4 +138,69 @@ public class GLSMultipleLinearRegression
model.newSampleData(y, x, omega);
TestUtils.assertEquals(model.calculateYVariance(), 3.5, 0);
}
+
+ /**
+ * Verifies that setting X, Y and covariance separately has the same effect as newSample(X,Y,cov).
+ */
+ @Test
+ public void testNewSample2() throws Exception {
+ double[] y = new double[] {1, 2, 3, 4};
+ double[][] x = new double[][] {
+ {19, 22, 33},
+ {20, 30, 40},
+ {25, 35, 45},
+ {27, 37, 47}
+ };
+ double[][] covariance = MatrixUtils.createRealIdentityMatrix(4).scalarMultiply(2).getData();
+ GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
+ regression.newSampleData(y, x, covariance);
+ RealMatrix combinedX = regression.X.copy();
+ RealVector combinedY = regression.Y.copy();
+ RealMatrix combinedCovInv = regression.getOmegaInverse();
+ regression.newXSampleData(x);
+ regression.newYSampleData(y);
+ assertEquals(combinedX, regression.X);
+ assertEquals(combinedY, regression.Y);
+ assertEquals(combinedCovInv, regression.getOmegaInverse());
+ }
+
+ /**
+ * Verifies that GLS with identity covariance matrix gives the same results
+ * as OLS.
+ */
+ @Test
+ public void testGLSOLSConsistency() throws Exception {
+ // Use Longley data to test
+ double[] design = new double[] {
+ 60323,83.0,234289,2356,1590,107608,1947,
+ 61122,88.5,259426,2325,1456,108632,1948,
+ 60171,88.2,258054,3682,1616,109773,1949,
+ 61187,89.5,284599,3351,1650,110929,1950,
+ 63221,96.2,328975,2099,3099,112075,1951,
+ 63639,98.1,346999,1932,3594,113270,1952,
+ 64989,99.0,365385,1870,3547,115094,1953,
+ 63761,100.0,363112,3578,3350,116219,1954,
+ 66019,101.2,397469,2904,3048,117388,1955,
+ 67857,104.6,419180,2822,2857,118734,1956,
+ 68169,108.4,442769,2936,2798,120445,1957,
+ 66513,110.8,444546,4681,2637,121950,1958,
+ 68655,112.6,482704,3813,2552,123366,1959,
+ 69564,114.2,502601,3931,2514,125368,1960,
+ 69331,115.7,518173,4806,2572,127852,1961,
+ 70551,116.9,554894,4007,2827,130081,1962
+ };
+ RealMatrix identityCov = MatrixUtils.createRealIdentityMatrix(16);
+ GLSMultipleLinearRegression glsModel = new GLSMultipleLinearRegression();
+ OLSMultipleLinearRegression olsModel = new OLSMultipleLinearRegression();
+ glsModel.newSampleData(design, 16, 6);
+ olsModel.newSampleData(design, 16, 6);
+ glsModel.newCovarianceData(identityCov.getData());
+ double[] olsBeta = olsModel.calculateBeta().getData();
+ double[] glsBeta = glsModel.calculateBeta().getData();
+ // TODO: Should have assertRelativelyEquals(double[], double[], eps) in TestUtils
+ // Should also add RealVector and RealMatrix versions
+ for (int i = 0; i < olsBeta.length; i++) {
+ TestUtils.assertRelativelyEquals(olsBeta[i], glsBeta[i], 10E-7);
+ }
+ }
}
Modified: commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java (original)
+++ commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java Wed Sep 8 01:24:51 2010
@@ -16,6 +16,8 @@
*/
package org.apache.commons.math.stat.regression;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.RealVector;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -25,14 +27,14 @@ import org.junit.Test;
public abstract class MultipleLinearRegressionAbstractTest {
- protected MultipleLinearRegression regression;
+ protected AbstractMultipleLinearRegression regression;
@Before
public void setUp(){
regression = createRegression();
}
- protected abstract MultipleLinearRegression createRegression();
+ protected abstract AbstractMultipleLinearRegression createRegression();
protected abstract int getNumberOfRegressors();
@@ -63,5 +65,62 @@ public abstract class MultipleLinearRegr
assertTrue(variance > 0.0);
}
}
+
+ /**
+ * Verifies that newSampleData methods consistently insert unitary columns
+ * in design matrix. Confirms the fix for MATH-411.
+ */
+ @Test
+ public void testNewSample() throws Exception {
+ double[] design = new double[] {
+ 1, 19, 22, 33,
+ 2, 20, 30, 40,
+ 3, 25, 35, 45,
+ 4, 27, 37, 47
+ };
+ double[] y = new double[] {1, 2, 3, 4};
+ double[][] x = new double[][] {
+ {19, 22, 33},
+ {20, 30, 40},
+ {25, 35, 45},
+ {27, 37, 47}
+ };
+ AbstractMultipleLinearRegression regression = (AbstractMultipleLinearRegression) createRegression();
+ regression.newSampleData(design, 4, 3);
+ RealMatrix flatX = regression.X.copy();
+ RealVector flatY = regression.Y.copy();
+ regression.newXSampleData(x);
+ regression.newYSampleData(y);
+ assertEquals(flatX, regression.X);
+ assertEquals(flatY, regression.Y);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleNullData() throws Exception {
+ double[] data = null;
+ createRegression().newSampleData(data, 2, 3);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleInvalidData() throws Exception {
+ double[] data = new double[] {1, 2, 3, 4};
+ createRegression().newSampleData(data, 2, 3);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleInsufficientData() throws Exception {
+ double[] data = new double[] {1, 2, 3, 4};
+ createRegression().newSampleData(data, 1, 3);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testXSampleDataNull() {
+ createRegression().newXSampleData(null);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testYSampleDataNull() {
+ createRegression().newYSampleData(null);
+ }
}
Modified: commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java (original)
+++ commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java Wed Sep 8 01:24:51 2010
@@ -24,6 +24,7 @@ import org.apache.commons.math.linear.Ma
import org.apache.commons.math.linear.MatrixVisitorException;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.Array2DRowRealMatrix;
+import org.apache.commons.math.linear.RealVector;
import org.apache.commons.math.stat.StatUtils;
import org.junit.Before;
import org.junit.Test;
@@ -38,12 +39,12 @@ public class OLSMultipleLinearRegression
public void setUp(){
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
x = new double[6][];
- x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
- x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
- x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
- x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
- x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
- x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
+ x[0] = new double[]{0, 0, 0, 0, 0};
+ x[1] = new double[]{2.0, 0, 0, 0, 0};
+ x[2] = new double[]{0, 3.0, 0, 0, 0};
+ x[3] = new double[]{0, 0, 4.0, 0, 0};
+ x[4] = new double[]{0, 0, 0, 5.0, 0};
+ x[5] = new double[]{0, 0, 0, 0, 6.0};
super.setUp();
}
@@ -56,24 +57,14 @@ public class OLSMultipleLinearRegression
@Override
protected int getNumberOfRegressors() {
- return x[0].length;
+ return x[0].length + 1;
}
@Override
protected int getSampleSize() {
return y.length;
}
-
- @Test(expected=IllegalArgumentException.class)
- public void cannotAddXSampleData() {
- createRegression().newSampleData(new double[]{}, null);
- }
-
- @Test(expected=IllegalArgumentException.class)
- public void cannotAddNullYSampleData() {
- createRegression().newSampleData(null, new double[][]{});
- }
-
+
@Test(expected=IllegalArgumentException.class)
public void cannotAddSampleDataWithSizeMismatch() {
double[] y = new double[]{1.0, 2.0};
@@ -248,17 +239,16 @@ public class OLSMultipleLinearRegression
44.7,46.6,16,29,50.43,
42.8,27.7,22,29,58.33
};
-
- // Transform to Y and X required by interface
- int nobs = 47;
- int nvars = 4;
+
+ final int nobs = 47;
+ final int nvars = 4;
// Estimate the model
OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
model.newSampleData(design, nobs, nvars);
// Check expected beta values from R
- double[] betaHat = model.estimateRegressionParameters();
+ final double[] betaHat = model.estimateRegressionParameters();
TestUtils.assertEquals(betaHat,
new double[]{91.05542390271397,
-0.22064551045715,
@@ -267,7 +257,7 @@ public class OLSMultipleLinearRegression
0.12441843147162}, 1E-12);
// Check expected residuals from R
- double[] residuals = model.estimateResiduals();
+ final double[] residuals = model.estimateResiduals();
TestUtils.assertEquals(residuals, new double[]{
7.1044267859730512,1.6580347433531366,
4.6944952770029644,8.4548022690166160,13.6547432343186212,
@@ -288,7 +278,7 @@ public class OLSMultipleLinearRegression
1E-12);
// Check standard errors from R
- double[] errors = model.estimateRegressionParametersStandardErrors();
+ final double[] errors = model.estimateRegressionParametersStandardErrors();
TestUtils.assertEquals(new double[] {6.94881329475087,
0.07360008972340,
0.27410957467466,
@@ -404,4 +394,36 @@ public class OLSMultipleLinearRegression
model.calculateErrorVariance() * (X.getRowDimension() - X.getColumnDimension()), 1E-20);
}
+
+ /**
+ * Verifies that setting X and Y separately has the same effect as newSample(X,Y).
+ */
+ @Test
+ public void testNewSample2() throws Exception {
+ double[] y = new double[] {1, 2, 3, 4};
+ double[][] x = new double[][] {
+ {19, 22, 33},
+ {20, 30, 40},
+ {25, 35, 45},
+ {27, 37, 47}
+ };
+ OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
+ regression.newSampleData(y, x);
+ RealMatrix combinedX = regression.X.copy();
+ RealVector combinedY = regression.Y.copy();
+ regression.newXSampleData(x);
+ regression.newYSampleData(y);
+ assertEquals(combinedX, regression.X);
+ assertEquals(combinedY, regression.Y);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleDataYNull() {
+ createRegression().newSampleData(null, new double[][] {});
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleDataXNull() {
+ createRegression().newSampleData(new double[] {}, null);
+ }
}
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/exception/util/LocalizedFormats.java Wed Sep 8 01:24:51 2010
@@ -126,6 +126,7 @@ public enum LocalizedFormats implements
INVALID_INTERVAL_INITIAL_VALUE_PARAMETERS("invalid interval, initial value parameters: lower={0}, initial={1}, upper={2}"),
INVALID_ITERATIONS_LIMITS("invalid iteration limits: min={0}, max={1}"),
INVALID_MAX_ITERATIONS("bad value for maximum iterations number: {0}"),
+ INVALID_REGRESSION_ARRAY("input data array length = {0} does not match the number of observations = {1} and the number of regressors = {2}"),
INVALID_ROUNDING_METHOD("invalid rounding method {0}, valid methods: {1} ({2}), {3} ({4}), {5} ({6}), {7} ({8}), {9} ({10}), {11} ({12}), {13} ({14}), {15} ({16})"),
ITERATOR_EXHAUSTED("iterator exhausted"),
LCM_OVERFLOW_32_BITS("overflow: lcm({0}, {1}) is 2^31"),
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java Wed Sep 8 01:24:51 2010
@@ -40,14 +40,47 @@ public abstract class AbstractMultipleLi
protected RealVector Y;
/**
- * Loads model x and y sample data from a flat array of data, overriding any previous sample.
- * Assumes that rows are concatenated with y values first in each row.
+ * <p>Loads model x and y sample data from a flat input array, overriding any previous sample.
+ * </p>
+ * <p>Assumes that rows are concatenated with y values first in each row. For example, an input
+ * <code>data</code> array containing the sequence of values (1, 2, 3, 4, 5, 6, 7, 8, 9) with
+ * <code>nobs = 3</code> and <code>nvars = 2</code> creates a regression dataset with two
+ * independent variables, as below:
+ * <pre>
+ * y x[0] x[1]
+ * --------------
+ * 1 2 3
+ * 4 5 6
+ * 7 8 9
+ * </pre>
+ * </p>
+ * <p>Note that there is no need to add an initial unitary column (column of 1's) when
+ * specifying a model including an intercept term.
+ * </p>
+ * <p>Throws IllegalArgumentException if any of the following preconditions fail:
+ * <ul><li><code>data</code> cannot be null</li>
+ * <li><code>data.length = nobs * (nvars + 1)</li>
+ * <li><code>nobs > nvars</code></li></ul>
+ * </p>
*
* @param data input data array
* @param nobs number of observations (rows)
* @param nvars number of independent variables (columns, not counting y)
+ * @throws IllegalArgumentException if the preconditions are not met
*/
public void newSampleData(double[] data, int nobs, int nvars) {
+ if (data == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (data.length != nobs * (nvars + 1)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INVALID_REGRESSION_ARRAY, data.length, nobs, nvars);
+ }
+ if (nobs <= nvars) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS);
+ }
double[] y = new double[nobs];
double[][] x = new double[nobs][nvars + 1];
int pointer = 0;
@@ -63,30 +96,82 @@ public abstract class AbstractMultipleLi
}
/**
- * Loads new y sample data, overriding any previous sample
+ * Loads new y sample data, overriding any previous data.
*
- * @param y the [n,1] array representing the y sample
+ * @param y the array representing the y sample
+ * @throws IllegalArgumentException if y is null or empty
*/
protected void newYSampleData(double[] y) {
+ if (y == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (y.length == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
this.Y = new ArrayRealVector(y);
}
/**
- * Loads new x sample data, overriding any previous sample
- *
- * @param x the [n,k] array representing the x sample
+ * <p>Loads new x sample data, overriding any previous data.
+ * </p>
+ * The input <code>x</code> array should have one row for each sample
+ * observation, with columns corresponding to independent variables.
+ * For example, if <pre>
+ * <code> x = new double[][] {{1, 2}, {3, 4}, {5, 6}} </code></pre>
+ * then <code>setXSampleData(x) </code> results in a model with two independent
+ * variables and 3 observations:
+ * <pre>
+ * x[0] x[1]
+ * ----------
+ * 1 2
+ * 3 4
+ * 5 6
+ * </pre>
+ * </p>
+ * <p>Note that there is no need to add an initial unitary column (column of 1's) when
+ * specifying a model including an intercept term.
+ * </p>
+ * @param x the rectangular array representing the x sample
+ * @throws IllegalArgumentException if x is null, empty or not rectangular
*/
protected void newXSampleData(double[][] x) {
- this.X = new Array2DRowRealMatrix(x);
+ if (x == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (x.length == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
+ final int nVars = x[0].length;
+ final double[][] xAug = new double[x.length][nVars + 1];
+ for (int i = 0; i < x.length; i++) {
+ if (x[i].length != nVars) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
+ x[i].length, nVars);
+ }
+ xAug[i][0] = 1.0d;
+ System.arraycopy(x[i], 0, xAug[i], 1, nVars);
+ }
+ this.X = new Array2DRowRealMatrix(xAug, false);
}
/**
- * Validates sample data.
+ * Validates sample data. Checks that
+ * <ul><li>Neither x nor y is null or empty;</li>
+ * <li>The length (i.e. number of rows) of x equals the length of y</li>
+ * <li>x has at least one more row than it has columns (i.e. there is
+ * sufficient data to estimate regression coefficients for each of the
+ * columns in x plus an intercept.</li>
+ * </ul>
+ *
+ * @param x the [n,k] array representing the x data
+ * @param y the [n,1] array representing the y data
+ * @throws IllegalArgumentException if any of the checks fail
*
- * @param x the [n,k] array representing the x sample
- * @param y the [n,1] array representing the y sample
- * @throws IllegalArgumentException if the x and y array data are not
- * compatible for the regression
*/
protected void validateSampleData(double[][] x, double[] y) {
if ((x == null) || (y == null) || (x.length != y.length)) {
@@ -94,7 +179,12 @@ public abstract class AbstractMultipleLi
LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
(x == null) ? 0 : x.length,
(y == null) ? 0 : y.length);
- } else if ((x.length > 0) && (x[0].length > x.length)) {
+ }
+ if (x.length == 0) { // Must be no y data either
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
+ if (x[0].length + 1 > x.length) {
throw MathRuntimeException.createIllegalArgumentException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
x.length, x[0].length);
@@ -102,12 +192,13 @@ public abstract class AbstractMultipleLi
}
/**
- * Validates sample data.
+ * Validates that the x data and covariance matrix have the same
+ * number of rows and that the covariance matrix is square.
*
* @param x the [n,k] array representing the x sample
* @param covariance the [n,n] array representing the covariance matrix
- * @throws IllegalArgumentException if the x sample data or covariance
- * matrix are not compatible for the regression
+ * @throws IllegalArgumentException if the number of rows in x is not equal
+ * to the number of rows in covariance or covariance is not square.
*/
protected void validateCovarianceData(double[][] x, double[][] covariance) {
if (x.length != covariance.length) {
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java Wed Sep 8 01:24:51 2010
@@ -77,8 +77,7 @@ public class OLSMultipleLinearRegression
/**
* {@inheritDoc}
- *
- * Computes and caches QR decomposition of the X matrix
+ * <p>This implementation computes and caches the QR decomposition of the X matrix.</p>
*/
@Override
public void newSampleData(double[] data, int nobs, int nvars) {
@@ -132,7 +131,7 @@ public class OLSMultipleLinearRegression
}
/**
- * Returns the sum of square residuals.
+ * Returns the sum of squared residuals.
*
* @return residual sum of squares
*/
@@ -168,22 +167,20 @@ public class OLSMultipleLinearRegression
final double n = X.getRowDimension();
return 1 - (calculateResidualSumOfSquares() * (n - 1)) /
(calculateTotalSumOfSquares() * (n - X.getColumnDimension()));
- // return 1 - ((1 - calculateRSquare()) * (n - 1) / (n - X.getColumnDimension() - 1));
}
/**
- * Loads new x sample data, overriding any previous sample
- *
- * @param x the [n,k] array representing the x sample
+ * {@inheritDoc}
+ * <p>This implementation computes and caches the QR decomposition of the X matrix once it is successfully loaded.</p>
*/
@Override
protected void newXSampleData(double[][] x) {
- this.X = new Array2DRowRealMatrix(x);
+ super.newXSampleData(x);
qr = new QRDecompositionImpl(X);
}
/**
- * Calculates regression coefficients using OLS.
+ * Calculates the regression coefficients using OLS.
*
* @return beta
*/
Modified: commons/proper/math/trunk/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties (original)
+++ commons/proper/math/trunk/src/main/resources/META-INF/localization/LocalizedFormats_fr.properties Wed Sep 8 01:24:51 2010
@@ -98,6 +98,7 @@ INVALID_BRACKETING_PARAMETERS = param\u0
INVALID_INTERVAL_INITIAL_VALUE_PARAMETERS = param\u00e8tres de l''intervalle initial invalides : borne inf = {0}, valeur initiale = {1}, borne sup = {2}
INVALID_ITERATIONS_LIMITS = limites d''it\u00e9rations invalides : min = {0}, max = {1}
INVALID_MAX_ITERATIONS = valeur invalide pour le nombre maximal d''it\u00e9rations : {0}
+INVALID_REGRESSION_ARRAY= longueur du tableau de donn\u00e9es = {0} ne correspond pas au nombre d'observations = {1} et le nombre de variables explicatives = {2}
INVALID_ROUNDING_METHOD = m\u00e9thode d''arondi {0} invalide, m\u00e9thodes valides : {1} ({2}), {3} ({4}), {5} ({6}), {7} ({8}), {9} ({10}), {11} ({12}), {13} ({14}), {15} ({16})
ITERATOR_EXHAUSTED = it\u00e9ration achev\u00e9e
LCM_OVERFLOW_32_BITS = d\u00e9passement de capacit\u00e9 : le MCM de {0} et {1} vaut 2^31
Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/trunk/src/site/xdoc/changes.xml (original)
+++ commons/proper/math/trunk/src/site/xdoc/changes.xml Wed Sep 8 01:24:51 2010
@@ -71,6 +71,19 @@ The <action> type attribute can be add,u
</action>
</release>
<release version="2.2" date="TBD" description="TBD">
+ <action dev="psteitz" type="fix" issue="MATH-411">
+ Modified multiple regression newSample methods to ensure that by default in all cases,
+ regression models are estimated with intercept terms. Prior to the fix for this issue,
+ newXSampleData(double[][]), newSampleData(double[], double[][]) and
+ newSampleData(double[], double[][], double[][]) all required columns of "1's" to be inserted
+ into the x[][] arrays to create a model with an intercept term; while newSampleData(double[], int, int)
+ created a model including an intercept term without requiring the unitary column. All methods have
+ been changed to eliminate the need for users to add unitary columns to specify regression models.
+ <!-- uncomment when MATH-409 is resolved (noIntercept option)
+ Users of OLSMultipleLinearRegression or GLSMultipleLinearRegression versions 2.0 or 2.1 should either
+ verify that their code either does not use the first set of data loading methods above or set the noIntercept
+ property on estimated models to get the previous behavior. -->
+ </action>
<action dev="luc" type="fix" issue="MATH-412" due-to="Bill Rossi">
Added the dfp library providing arbitrary precision floating point computation in the spirit of
IEEE 854-1987 (not exactly as it uses base 1000 instead of base 10). In addition to finite numbers,
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java Wed Sep 8 01:24:51 2010
@@ -16,10 +16,13 @@
*/
package org.apache.commons.math.stat.regression;
+import static org.junit.Assert.assertEquals;
import org.junit.Before;
import org.junit.Test;
import org.apache.commons.math.TestUtils;
-import org.apache.commons.math.stat.StatUtils;
+import org.apache.commons.math.linear.MatrixUtils;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.RealVector;
public class GLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest {
@@ -32,12 +35,12 @@ public class GLSMultipleLinearRegression
public void setUp(){
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
x = new double[6][];
- x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
- x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
- x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
- x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
- x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
- x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
+ x[0] = new double[]{0, 0, 0, 0, 0};
+ x[1] = new double[]{2.0, 0, 0, 0, 0};
+ x[2] = new double[]{0, 3.0, 0, 0, 0};
+ x[3] = new double[]{0, 0, 4.0, 0, 0};
+ x[4] = new double[]{0, 0, 0, 5.0, 0};
+ x[5] = new double[]{0, 0, 0, 0, 6.0};
omega = new double[6][];
omega[0] = new double[]{1.0, 0, 0, 0, 0, 0};
omega[1] = new double[]{0, 2.0, 0, 0, 0, 0};
@@ -115,7 +118,7 @@ public class GLSMultipleLinearRegression
@Override
protected int getNumberOfRegressors() {
- return x[0].length;
+ return x[0].length + 1;
}
@Override
@@ -135,4 +138,69 @@ public class GLSMultipleLinearRegression
model.newSampleData(y, x, omega);
TestUtils.assertEquals(model.calculateYVariance(), 3.5, 0);
}
+
+ /**
+ * Verifies that setting X, Y and covariance separately has the same effect as newSample(X,Y,cov).
+ */
+ @Test
+ public void testNewSample2() throws Exception {
+ double[] y = new double[] {1, 2, 3, 4};
+ double[][] x = new double[][] {
+ {19, 22, 33},
+ {20, 30, 40},
+ {25, 35, 45},
+ {27, 37, 47}
+ };
+ double[][] covariance = MatrixUtils.createRealIdentityMatrix(4).scalarMultiply(2).getData();
+ GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
+ regression.newSampleData(y, x, covariance);
+ RealMatrix combinedX = regression.X.copy();
+ RealVector combinedY = regression.Y.copy();
+ RealMatrix combinedCovInv = regression.getOmegaInverse();
+ regression.newXSampleData(x);
+ regression.newYSampleData(y);
+ assertEquals(combinedX, regression.X);
+ assertEquals(combinedY, regression.Y);
+ assertEquals(combinedCovInv, regression.getOmegaInverse());
+ }
+
+ /**
+ * Verifies that GLS with identity covariance matrix gives the same results
+ * as OLS.
+ */
+ @Test
+ public void testGLSOLSConsistency() throws Exception {
+ // Use Longley data to test
+ double[] design = new double[] {
+ 60323,83.0,234289,2356,1590,107608,1947,
+ 61122,88.5,259426,2325,1456,108632,1948,
+ 60171,88.2,258054,3682,1616,109773,1949,
+ 61187,89.5,284599,3351,1650,110929,1950,
+ 63221,96.2,328975,2099,3099,112075,1951,
+ 63639,98.1,346999,1932,3594,113270,1952,
+ 64989,99.0,365385,1870,3547,115094,1953,
+ 63761,100.0,363112,3578,3350,116219,1954,
+ 66019,101.2,397469,2904,3048,117388,1955,
+ 67857,104.6,419180,2822,2857,118734,1956,
+ 68169,108.4,442769,2936,2798,120445,1957,
+ 66513,110.8,444546,4681,2637,121950,1958,
+ 68655,112.6,482704,3813,2552,123366,1959,
+ 69564,114.2,502601,3931,2514,125368,1960,
+ 69331,115.7,518173,4806,2572,127852,1961,
+ 70551,116.9,554894,4007,2827,130081,1962
+ };
+ RealMatrix identityCov = MatrixUtils.createRealIdentityMatrix(16);
+ GLSMultipleLinearRegression glsModel = new GLSMultipleLinearRegression();
+ OLSMultipleLinearRegression olsModel = new OLSMultipleLinearRegression();
+ glsModel.newSampleData(design, 16, 6);
+ olsModel.newSampleData(design, 16, 6);
+ glsModel.newCovarianceData(identityCov.getData());
+ double[] olsBeta = olsModel.calculateBeta().getData();
+ double[] glsBeta = glsModel.calculateBeta().getData();
+ // TODO: Should have assertRelativelyEquals(double[], double[], eps) in TestUtils
+ // Should also add RealVector and RealMatrix versions
+ for (int i = 0; i < olsBeta.length; i++) {
+ TestUtils.assertRelativelyEquals(olsBeta[i], glsBeta[i], 10E-7);
+ }
+ }
}
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MultipleLinearRegressionAbstractTest.java Wed Sep 8 01:24:51 2010
@@ -16,6 +16,8 @@
*/
package org.apache.commons.math.stat.regression;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.RealVector;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -25,14 +27,14 @@ import org.junit.Test;
public abstract class MultipleLinearRegressionAbstractTest {
- protected MultipleLinearRegression regression;
+ protected AbstractMultipleLinearRegression regression;
@Before
public void setUp(){
regression = createRegression();
}
- protected abstract MultipleLinearRegression createRegression();
+ protected abstract AbstractMultipleLinearRegression createRegression();
protected abstract int getNumberOfRegressors();
@@ -63,5 +65,62 @@ public abstract class MultipleLinearRegr
assertTrue(variance > 0.0);
}
}
+
+ /**
+ * Verifies that newSampleData methods consistently insert unitary columns
+ * in design matrix. Confirms the fix for MATH-411.
+ */
+ @Test
+ public void testNewSample() throws Exception {
+ double[] design = new double[] {
+ 1, 19, 22, 33,
+ 2, 20, 30, 40,
+ 3, 25, 35, 45,
+ 4, 27, 37, 47
+ };
+ double[] y = new double[] {1, 2, 3, 4};
+ double[][] x = new double[][] {
+ {19, 22, 33},
+ {20, 30, 40},
+ {25, 35, 45},
+ {27, 37, 47}
+ };
+ AbstractMultipleLinearRegression regression = (AbstractMultipleLinearRegression) createRegression();
+ regression.newSampleData(design, 4, 3);
+ RealMatrix flatX = regression.X.copy();
+ RealVector flatY = regression.Y.copy();
+ regression.newXSampleData(x);
+ regression.newYSampleData(y);
+ assertEquals(flatX, regression.X);
+ assertEquals(flatY, regression.Y);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleNullData() throws Exception {
+ double[] data = null;
+ createRegression().newSampleData(data, 2, 3);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleInvalidData() throws Exception {
+ double[] data = new double[] {1, 2, 3, 4};
+ createRegression().newSampleData(data, 2, 3);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleInsufficientData() throws Exception {
+ double[] data = new double[] {1, 2, 3, 4};
+ createRegression().newSampleData(data, 1, 3);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testXSampleDataNull() {
+ createRegression().newXSampleData(null);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testYSampleDataNull() {
+ createRegression().newYSampleData(null);
+ }
}
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java?rev=993574&r1=993573&r2=993574&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java Wed Sep 8 01:24:51 2010
@@ -24,6 +24,7 @@ import org.apache.commons.math.linear.Ma
import org.apache.commons.math.linear.MatrixVisitorException;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.Array2DRowRealMatrix;
+import org.apache.commons.math.linear.RealVector;
import org.apache.commons.math.stat.StatUtils;
import org.junit.Before;
import org.junit.Test;
@@ -38,12 +39,12 @@ public class OLSMultipleLinearRegression
public void setUp(){
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
x = new double[6][];
- x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
- x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
- x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
- x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
- x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
- x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
+ x[0] = new double[]{0, 0, 0, 0, 0};
+ x[1] = new double[]{2.0, 0, 0, 0, 0};
+ x[2] = new double[]{0, 3.0, 0, 0, 0};
+ x[3] = new double[]{0, 0, 4.0, 0, 0};
+ x[4] = new double[]{0, 0, 0, 5.0, 0};
+ x[5] = new double[]{0, 0, 0, 0, 6.0};
super.setUp();
}
@@ -56,24 +57,14 @@ public class OLSMultipleLinearRegression
@Override
protected int getNumberOfRegressors() {
- return x[0].length;
+ return x[0].length + 1;
}
@Override
protected int getSampleSize() {
return y.length;
}
-
- @Test(expected=IllegalArgumentException.class)
- public void cannotAddXSampleData() {
- createRegression().newSampleData(new double[]{}, null);
- }
-
- @Test(expected=IllegalArgumentException.class)
- public void cannotAddNullYSampleData() {
- createRegression().newSampleData(null, new double[][]{});
- }
-
+
@Test(expected=IllegalArgumentException.class)
public void cannotAddSampleDataWithSizeMismatch() {
double[] y = new double[]{1.0, 2.0};
@@ -248,17 +239,16 @@ public class OLSMultipleLinearRegression
44.7,46.6,16,29,50.43,
42.8,27.7,22,29,58.33
};
-
- // Transform to Y and X required by interface
- int nobs = 47;
- int nvars = 4;
+
+ final int nobs = 47;
+ final int nvars = 4;
// Estimate the model
OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
model.newSampleData(design, nobs, nvars);
// Check expected beta values from R
- double[] betaHat = model.estimateRegressionParameters();
+ final double[] betaHat = model.estimateRegressionParameters();
TestUtils.assertEquals(betaHat,
new double[]{91.05542390271397,
-0.22064551045715,
@@ -267,7 +257,7 @@ public class OLSMultipleLinearRegression
0.12441843147162}, 1E-12);
// Check expected residuals from R
- double[] residuals = model.estimateResiduals();
+ final double[] residuals = model.estimateResiduals();
TestUtils.assertEquals(residuals, new double[]{
7.1044267859730512,1.6580347433531366,
4.6944952770029644,8.4548022690166160,13.6547432343186212,
@@ -288,7 +278,7 @@ public class OLSMultipleLinearRegression
1E-12);
// Check standard errors from R
- double[] errors = model.estimateRegressionParametersStandardErrors();
+ final double[] errors = model.estimateRegressionParametersStandardErrors();
TestUtils.assertEquals(new double[] {6.94881329475087,
0.07360008972340,
0.27410957467466,
@@ -404,4 +394,36 @@ public class OLSMultipleLinearRegression
model.calculateErrorVariance() * (X.getRowDimension() - X.getColumnDimension()), 1E-20);
}
+
+ /**
+ * Verifies that setting X and Y separately has the same effect as newSample(X,Y).
+ */
+ @Test
+ public void testNewSample2() throws Exception {
+ double[] y = new double[] {1, 2, 3, 4};
+ double[][] x = new double[][] {
+ {19, 22, 33},
+ {20, 30, 40},
+ {25, 35, 45},
+ {27, 37, 47}
+ };
+ OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
+ regression.newSampleData(y, x);
+ RealMatrix combinedX = regression.X.copy();
+ RealVector combinedY = regression.Y.copy();
+ regression.newXSampleData(x);
+ regression.newYSampleData(y);
+ assertEquals(combinedX, regression.X);
+ assertEquals(combinedY, regression.Y);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleDataYNull() {
+ createRegression().newSampleData(null, new double[][] {});
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testNewSampleDataXNull() {
+ createRegression().newSampleData(new double[] {}, null);
+ }
}
Re: svn commit: r993574 - in /commons/proper/math: branches/MATH_2_X/src/main/java/org/apache/commons/math/exception/util/
branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/regression/
branches/MATH_2_X/src/main/resources/META-INF/localization/ ...
Posted by Phil Steitz <ph...@gmail.com>.
On 9/7/10 9:24 PM, psteitz@apache.org wrote:
> Author: psteitz
> Date: Wed Sep 8 01:24:51 2010
> New Revision: 993574
>
> URL: http://svn.apache.org/viewvc?rev=993574&view=rev
> Log:
> * Modified multiple regression newSample methods to ensure that by default in all cases,
> regression models are estimated with intercept terms. Prior to the fix for this issue,
> newXSampleData(double[][]), newSampleData(double[], double[][]) and
> newSampleData(double[], double[][], double[][]) all required columns of "1's"
> to be inserted into the x[][] arrays to create a model with an intercept term;
> while newSampleData(double[], int, int) created a model including an intercept
> term without requiring the unitary column. All methods have been changed to
> eliminate the need for users to add unitary columns to specify regression models.
> * Improved javadoc
> * Improved tests
> JIRA: MATH-411
I agonized over handling the behavior change here via deprecation +
adding new methods. If the old behavior was documented, I would
have done that; but when you combine the fact that the documentation
was so poor (still needs work, but it is better now) and the
non-trivial tests all use the "correct" (intercept included) method,
I thought it was best to make the behavior consistent, document the
change in the release notes, and show how to get the no intercept
behavior once MATH-409 is fixed. If anyone feels strongly that we
should go the add new methods and deprecate route (making an
arguably already overly complicated API even more complicated), I
can revert this change and do that for the 2_x branch.
Phil
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org