You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2012/02/16 20:41:55 UTC
svn commit: r1245133 - in /commons/proper/math/trunk/src:
main/java/org/apache/commons/math3/stat/correlation/
test/java/org/apache/commons/math3/stat/correlation/
Author: tn
Date: Thu Feb 16 19:41:42 2012
New Revision: 1245133
URL: http://svn.apache.org/viewvc?rev=1245133&view=rev
Log:
Changed StorelessCovariance according to suggestions from psteitz.
JIRA: MATH-449
Modified:
commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java
commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java
commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java?rev=1245133&r1=1245132&r2=1245133&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java Thu Feb 16 19:41:42 2012
@@ -30,10 +30,13 @@ import org.apache.commons.math3.exceptio
* Sandia National Laboratories. It computes the covariance for a pair of variables.
* Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p>
*
+ * <p>Note: This class is package private as it is only used internally in
+ * the {@link StorelessCovariance} class.</p>
+ *
* @version $Id$
* @since 3.0
*/
-public class StorelessBivariateCovariance {
+class StorelessBivariateCovariance {
/** the mean of variable x */
private double meanX;
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java?rev=1245133&r1=1245132&r2=1245133&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java Thu Feb 16 19:41:42 2012
@@ -34,121 +34,129 @@ import org.apache.commons.math3.linear.R
* Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
* Sandia National Laboratories.</p>
*
+ * <p>Note: the underlying covariance matrix is symmetric, thus only the
+ * upper triangular part of the matrix is stored and updated each increment.</p>
+ *
* @version $Id$
* @since 3.0
*/
public class StorelessCovariance extends Covariance {
- /** the two-dimensional covariance matrix */
- private StorelessBivariateCovariance[][] covMatrix;
-
- /** row dimension of the covariance matrix */
- private int rowDimension;
-
- /** column dimension of the covariance matrix */
- private int colDimension;
+ /** the square covariance matrix (upper triangular part) */
+ private StorelessBivariateCovariance[] covMatrix;
- /** flag for bias correction */
- private boolean biasCorrected;
+ /** dimension of the square covariance matrix */
+ private int dimension;
/**
- * Create a bias corrected covariance matrix with a given number of rows and columns.
+ * Create a bias corrected covariance matrix with a given dimension.
*
- * @param rows number of rows
- * @param cols number of columns
+ * @param dim the dimension of the square covariance matrix
*/
- public StorelessCovariance(final int rows, final int cols) {
- this(rows, cols, true);
+ public StorelessCovariance(final int dim) {
+ this(dim, true);
}
/**
* Create a covariance matrix with a given number of rows and columns and the
* indicated bias correction.
*
- * @param rows number of variables in the rows
- * @param cols number of variables in the columns
- * @param biasCorrection if <code>true</code> the covariance estimate is corrected
+ * @param dim the dimension of the covariance matrix
+ * @param biasCorrected if <code>true</code> the covariance estimate is corrected
* for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
* i.e. n in the denominator.
*/
- public StorelessCovariance(final int rows, final int cols,
- final boolean biasCorrection) {
- rowDimension = rows;
- colDimension = cols;
- biasCorrected = biasCorrection;
- covMatrix = new StorelessBivariateCovariance[rowDimension][colDimension];
- initializeMatrix();
+ public StorelessCovariance(final int dim, final boolean biasCorrected) {
+ dimension = dim;
+ covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
+ initializeMatrix(biasCorrected);
}
/**
* Initialize the internal two-dimensional array of
* {@link StorelessBivariateCovariance} instances.
+ *
+ * @param biasCorrected if the covariance estimate shall be corrected for bias
*/
- private void initializeMatrix() {
- for(int i=0;i<rowDimension;i++){
- for(int j=0;j<colDimension;j++){
- covMatrix[i][j] = new StorelessBivariateCovariance(biasCorrected);
+ private void initializeMatrix(final boolean biasCorrected) {
+ for(int i = 0; i < dimension; i++){
+ for(int j = 0; j < dimension; j++){
+ setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
}
}
}
/**
- * Get the covariance for an individual element of the covariance matrix.
+ * Returns the index (i, j) translated into the one-dimensional
+ * array used to store the upper triangular part of the symmetric
+ * covariance matrix.
*
- * @param xIndex row index in the covariance matrix
- * @param yIndex column index in the covariance matrix
- * @return the covariance of the given element
+ * @param i the row index
+ * @param j the column index
+ * @return the corresponding index in the matrix array
*/
- public StorelessBivariateCovariance getCovariance(final int xIndex,
- final int yIndex) {
- return covMatrix[xIndex][yIndex];
+ private int indexOf(final int i, final int j) {
+ return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
}
/**
- * Set the covariance for an individual element of the covariance matrix.
- *
- * @param xIndex row index in the covariance matrix
- * @param yIndex column index in the covariance matrix
- * @param cov the covariance to be set
+ * Gets the element at index (i, j) from the covariance matrix
+ * @param i the row index
+ * @param j the column index
+ * @return the {@link StorelessBivariateCovariance} element at the given index
*/
- public void setCovariance(final int xIndex, final int yIndex,
- final StorelessBivariateCovariance cov) {
- covMatrix[xIndex][yIndex] = cov;
+ private StorelessBivariateCovariance getElement(final int i, final int j) {
+ return covMatrix[indexOf(i, j)];
}
/**
- * Increment one individual element of the covariance matrix.
- *
- * <p>The element is specified by the xIndex and yIndex and incremented with the
- * corresponding values of x and y.</p>
+ * Sets the covariance element at index (i, j) in the covariance matrix
+ * @param i the row index
+ * @param j the column index
+ * @param cov the {@link StorelessBivariateCovariance} element to be set
+ */
+ private void setElement(final int i, final int j,
+ final StorelessBivariateCovariance cov) {
+ covMatrix[indexOf(i, j)] = cov;
+ }
+
+ /**
+ * Get the covariance for an individual element of the covariance matrix.
*
* @param xIndex row index in the covariance matrix
* @param yIndex column index in the covariance matrix
- * @param x value of x
- * @param y value of y
+ * @return the covariance of the given element
+ * @throws NumberIsTooSmallException if the number of observations
+ * in the cell is < 2
*/
- public void incrementCovariance(final int xIndex, final int yIndex,
- final double x, final double y) {
- covMatrix[xIndex][yIndex].increment(x, y);
+ public double getCovariance(final int xIndex,
+ final int yIndex)
+ throws NumberIsTooSmallException {
+
+ return getElement(xIndex, yIndex).getResult();
+
}
/**
* Increment the covariance matrix with one row of data.
*
- * @param rowData array representing one row of data.
+ * @param data array representing one row of data.
* @throws DimensionMismatchException if the length of <code>rowData</code>
* does not match with the covariance matrix
*/
- public void incrementRow(final double[] rowData)
+ public void increment(final double[] data)
throws DimensionMismatchException {
- int length = rowData.length;
- if (length != colDimension) {
- throw new DimensionMismatchException(length, colDimension);
+ int length = data.length;
+ if (length != dimension) {
+ throw new DimensionMismatchException(length, dimension);
}
+
+ // only update the upper triangular part of the covariance matrix
+ // as only these parts are actually stored
for (int i = 0; i < length; i++){
- for (int j = 0; j < length; j++){
- covMatrix[i][j].increment(rowData[i], rowData[j]);
+ for (int j = i; j < length; j++){
+ getElement(i, j).increment(data[i], data[j]);
}
}
@@ -171,10 +179,10 @@ public class StorelessCovariance extends
* for a cell is < 2
*/
public double[][] getData() throws NumberIsTooSmallException {
- final double[][] data = new double[rowDimension][rowDimension];
- for (int i = 0; i < rowDimension; i++) {
- for (int j = 0; j < colDimension; j++) {
- data[i][j] = covMatrix[i][j].getResult();
+ final double[][] data = new double[dimension][dimension];
+ for (int i = 0; i < dimension; i++) {
+ for (int j = 0; j < dimension; j++) {
+ data[i][j] = getElement(i, j).getResult();
}
}
return data;
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java?rev=1245133&r1=1245132&r2=1245133&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java Thu Feb 16 19:41:42 2012
@@ -19,9 +19,9 @@ package org.apache.commons.math3.stat.co
import org.apache.commons.math3.TestUtils;
import org.apache.commons.math3.linear.Array2DRowRealMatrix;
import org.apache.commons.math3.linear.RealMatrix;
+import org.junit.Assert;
import org.junit.Test;
-
public class StorelessCovarianceTest {
protected final double[] longleyData = new double[] {
@@ -163,9 +163,9 @@ public class StorelessCovarianceTest {
2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667
};
- StorelessCovariance covMatrix = new StorelessCovariance(7, 7);
+ StorelessCovariance covMatrix = new StorelessCovariance(7);
for(int i=0;i<matrix.getRowDimension();i++){
- covMatrix.incrementRow(matrix.getRow(i));
+ covMatrix.increment(matrix.getRow(i));
}
RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
@@ -174,8 +174,6 @@ public class StorelessCovarianceTest {
}
-
-
/**
* Test R Swiss fertility dataset against R.
* Data Source: R datasets package
@@ -192,9 +190,9 @@ public class StorelessCovarianceTest {
241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890
};
- StorelessCovariance covMatrix = new StorelessCovariance(5, 5);
+ StorelessCovariance covMatrix = new StorelessCovariance(5);
for(int i=0;i<matrix.getRowDimension();i++){
- covMatrix.incrementRow(matrix.getRow(i));
+ covMatrix.increment(matrix.getRow(i));
}
RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
@@ -203,93 +201,26 @@ public class StorelessCovarianceTest {
}
/**
- * Test Longley dataset against R.
- * Data Source: J. Longley (1967) "An Appraisal of Least Squares
- * Programs for the Electronic Computer from the Point of View of the User"
- * Journal of the American Statistical Association, vol. 62. September,
- * pp. 819-841.
- *
- * Data are from NIST:
- * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat
+ * Test symmetry of the covariance matrix
*/
@Test
- public void testLonglyByEntry() {
- RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
-
- double[] rData = new double[] {
- 12333921.73333333246, 3.679666000000000e+04, 343330206.333333313,
- 1649102.666666666744, 1117681.066666666651, 23461965.733333334, 16240.93333333333248,
- 36796.66000000000, 1.164576250000000e+02, 1063604.115416667,
- 6258.666250000000, 3490.253750000000, 73503.000000000, 50.92333333333334,
- 343330206.33333331347, 1.063604115416667e+06, 9879353659.329166412,
- 56124369.854166664183, 30880428.345833335072, 685240944.600000024, 470977.90000000002328,
- 1649102.66666666674, 6.258666250000000e+03, 56124369.854166664,
- 873223.429166666698, -115378.762499999997, 4462741.533333333, 2973.03333333333330,
- 1117681.06666666665, 3.490253750000000e+03, 30880428.345833335,
- -115378.762499999997, 484304.095833333326, 1764098.133333333, 1382.43333333333339,
- 23461965.73333333433, 7.350300000000000e+04, 685240944.600000024,
- 4462741.533333333209, 1764098.133333333302, 48387348.933333330, 32917.40000000000146,
- 16240.93333333333, 5.092333333333334e+01, 470977.900000000,
- 2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667
- };
+ public void testSymmetry() {
+ RealMatrix matrix = createRealMatrix(swissData, 47, 5);
- int row = matrix.getRowDimension();
- int col = matrix.getColumnDimension();
- double x = 0.0;
- double y = 0.0;
- StorelessCovariance covMatrix = new StorelessCovariance(7, 7);
- for(int i=0;i<row;i++){
- for(int j=0;j<col;j++){
- x = matrix.getEntry(i, j);
- for(int k=0;k<col;k++){
- y = matrix.getEntry(i, k);
- covMatrix.incrementCovariance(j, k, x, y);
- }
- }
+ final int dimension = 5;
+ StorelessCovariance storelessCov = new StorelessCovariance(dimension);
+ for(int i=0;i<matrix.getRowDimension();i++){
+ storelessCov.increment(matrix.getRow(i));
}
- RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
-
- TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 7, 7), covarianceMatrix, 10E-7);
-
- }
-
- /**
- * Test R Swiss fertility dataset against R.
- * Data Source: R datasets package
- */
- @Test
- public void testSwissFertilityByEntry() {
- RealMatrix matrix = createRealMatrix(swissData, 47, 5);
-
- double[] rData = new double[] {
- 156.0424976873265, 100.1691489361702, -64.36692876965772, -79.7295097132285, 241.5632030527289,
- 100.169148936170251, 515.7994172062905, -124.39283071230344, -139.6574005550416, 379.9043755781684,
- -64.3669287696577, -124.3928307123034, 63.64662349676226, 53.5758556891767, -190.5606105457909,
- -79.7295097132285, -139.6574005550416, 53.57585568917669, 92.4560592044403, -61.6988297872340,
- 241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890
- };
-
- int row = matrix.getRowDimension();
- int col = matrix.getColumnDimension();
- double x = 0.0;
- double y = 0.0;
- StorelessCovariance covMatrix = new StorelessCovariance(5, 5);
- for(int i=0;i<row;i++){
- for(int j=0;j<col;j++){
- x = matrix.getEntry(i, j);
- for(int k=0;k<col;k++){
- y = matrix.getEntry(i, k);
- covMatrix.incrementCovariance(j, k, x, y);
- }
+ double[][] covMatrix = storelessCov.getData();
+ for (int i = 0; i < dimension; i++) {
+ for (int j = i; j < dimension; j++) {
+ Assert.assertEquals(covMatrix[i][j], covMatrix[j][i], 10e-9);
}
}
-
- RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
-
- TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 5, 5), covarianceMatrix, 10E-13);
}
-
+
protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
double[][] matrixData = new double[nRows][nCols];
int ptr = 0;