You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2012/02/16 20:41:55 UTC

svn commit: r1245133 - in /commons/proper/math/trunk/src: main/java/org/apache/commons/math3/stat/correlation/ test/java/org/apache/commons/math3/stat/correlation/

Author: tn
Date: Thu Feb 16 19:41:42 2012
New Revision: 1245133

URL: http://svn.apache.org/viewvc?rev=1245133&view=rev
Log:
Changed StorelessCovariance according to suggestions from psteitz.
JIRA: MATH-449

Modified:
    commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java
    commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java
    commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java

Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java?rev=1245133&r1=1245132&r2=1245133&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java Thu Feb 16 19:41:42 2012
@@ -30,10 +30,13 @@ import org.apache.commons.math3.exceptio
  * Sandia National Laboratories. It computes the covariance for a pair of variables.
  * Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p>
  *
+ * <p>Note: This class is package private as it is only used internally in
+ * the {@link StorelessCovariance} class.</p>
+ *
  * @version $Id$
  * @since 3.0
  */
-public class StorelessBivariateCovariance {
+class StorelessBivariateCovariance {
 
     /** the mean of variable x */
     private double meanX;

Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java?rev=1245133&r1=1245132&r2=1245133&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java Thu Feb 16 19:41:42 2012
@@ -34,121 +34,129 @@ import org.apache.commons.math3.linear.R
  * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
  * Sandia National Laboratories.</p>
  *
+ * <p>Note: the underlying covariance matrix is symmetric, thus only the
+ * upper triangular part of the matrix is stored and updated each increment.</p>
+ *
  * @version $Id$
  * @since 3.0
  */
 public class StorelessCovariance extends Covariance {
 
-    /** the two-dimensional covariance matrix */
-    private StorelessBivariateCovariance[][] covMatrix;
-
-    /** row dimension of the covariance matrix */
-    private int rowDimension;
-
-    /** column dimension of the covariance matrix */
-    private int colDimension;
+    /** the square covariance matrix (upper triangular part) */
+    private StorelessBivariateCovariance[] covMatrix;
 
-    /** flag for bias correction */
-    private boolean biasCorrected;
+    /** dimension of the square covariance matrix */
+    private int dimension;
 
     /**
-     * Create a bias corrected covariance matrix with a given number of rows and columns.
+     * Create a bias corrected covariance matrix with a given dimension.
      *
-     * @param rows number of rows
-     * @param cols number of columns
+     * @param dim the dimension of the square covariance matrix
      */
-    public StorelessCovariance(final int rows, final int cols) {
-        this(rows, cols, true);
+    public StorelessCovariance(final int dim) {
+        this(dim, true);
     }
 
     /**
      * Create a covariance matrix with a given number of rows and columns and the
      * indicated bias correction.
      *
-     * @param rows number of variables in the rows
-     * @param cols number of variables in the columns
-     * @param biasCorrection if <code>true</code> the covariance estimate is corrected
+     * @param dim the dimension of the covariance matrix
+     * @param biasCorrected if <code>true</code> the covariance estimate is corrected
      * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
      * i.e. n in the denominator.
      */
-    public StorelessCovariance(final int rows, final int cols,
-                               final boolean biasCorrection) {
-        rowDimension = rows;
-        colDimension = cols;
-        biasCorrected = biasCorrection;
-        covMatrix = new StorelessBivariateCovariance[rowDimension][colDimension];
-        initializeMatrix();
+    public StorelessCovariance(final int dim, final boolean biasCorrected) {
+        dimension = dim;
+        covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
+        initializeMatrix(biasCorrected);
     }
 
     /**
      * Initialize the internal two-dimensional array of
      * {@link StorelessBivariateCovariance} instances.
+     *
+     * @param biasCorrected if the covariance estimate shall be corrected for bias
      */
-    private void initializeMatrix() {
-        for(int i=0;i<rowDimension;i++){
-            for(int j=0;j<colDimension;j++){
-                covMatrix[i][j] = new StorelessBivariateCovariance(biasCorrected);
+    private void initializeMatrix(final boolean biasCorrected) {
+        for(int i = 0; i < dimension; i++){
+            for(int j = 0; j < dimension; j++){
+                setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
             }
         }
     }
 
     /**
-     * Get the covariance for an individual element of the covariance matrix.
+     * Returns the index (i, j) translated into the one-dimensional
+     * array used to store the upper triangular part of the symmetric
+     * covariance matrix.
      *
-     * @param xIndex row index in the covariance matrix
-     * @param yIndex column index in the covariance matrix
-     * @return the covariance of the given element
+     * @param i the row index
+     * @param j the column index
+     * @return the corresponding index in the matrix array
      */
-    public StorelessBivariateCovariance getCovariance(final int xIndex,
-                                                      final int yIndex) {
-        return covMatrix[xIndex][yIndex];
+    private int indexOf(final int i, final int j) {
+        return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
     }
 
     /**
-     * Set the covariance for an individual element of the covariance matrix.
-     *
-     * @param xIndex row index in the covariance matrix
-     * @param yIndex column index in the covariance matrix
-     * @param cov the covariance to be set
+     * Gets the element at index (i, j) from the covariance matrix
+     * @param i the row index
+     * @param j the column index
+     * @return the {@link StorelessBivariateCovariance} element at the given index
      */
-    public void setCovariance(final int xIndex, final int yIndex,
-                              final StorelessBivariateCovariance cov) {
-        covMatrix[xIndex][yIndex] = cov;
+    private StorelessBivariateCovariance getElement(final int i, final int j) {
+        return covMatrix[indexOf(i, j)];
     }
 
     /**
-     * Increment one individual element of the covariance matrix.
-     *
-     * <p>The element is specified by the xIndex and yIndex and incremented with the
-     * corresponding values of x and y.</p>
+     * Sets the covariance element at index (i, j) in the covariance matrix
+     * @param i the row index
+     * @param j the column index
+     * @param cov the {@link StorelessBivariateCovariance} element to be set
+     */
+    private void setElement(final int i, final int j,
+                            final StorelessBivariateCovariance cov) {
+        covMatrix[indexOf(i, j)] = cov;
+    }
+
+    /**
+     * Get the covariance for an individual element of the covariance matrix.
      *
      * @param xIndex row index in the covariance matrix
      * @param yIndex column index in the covariance matrix
-     * @param x value of x
-     * @param y value of y
+     * @return the covariance of the given element
+     * @throws NumberIsTooSmallException if the number of observations
+     * in the cell is &lt; 2
      */
-    public void incrementCovariance(final int xIndex, final int yIndex,
-                                    final double x, final double y) {
-        covMatrix[xIndex][yIndex].increment(x, y);
+    public double getCovariance(final int xIndex,
+                                final int yIndex)
+        throws NumberIsTooSmallException {
+
+        return getElement(xIndex, yIndex).getResult();
+
     }
 
     /**
      * Increment the covariance matrix with one row of data.
      *
-     * @param rowData array representing one row of data.
+     * @param data array representing one row of data.
      * @throws DimensionMismatchException if the length of <code>rowData</code>
      * does not match with the covariance matrix
      */
-    public void incrementRow(final double[] rowData)
+    public void increment(final double[] data)
         throws DimensionMismatchException {
 
-        int length = rowData.length;
-        if (length != colDimension) {
-            throw new DimensionMismatchException(length, colDimension);
+        int length = data.length;
+        if (length != dimension) {
+            throw new DimensionMismatchException(length, dimension);
         }
+
+        // only update the upper triangular part of the covariance matrix
+        // as only these parts are actually stored
         for (int i = 0; i < length; i++){
-            for (int j = 0; j < length; j++){
-                covMatrix[i][j].increment(rowData[i], rowData[j]);
+            for (int j = i; j < length; j++){
+                getElement(i, j).increment(data[i], data[j]);
             }
         }
 
@@ -171,10 +179,10 @@ public class StorelessCovariance extends
      * for a cell is &lt; 2
      */
     public double[][] getData() throws NumberIsTooSmallException {
-        final double[][] data = new double[rowDimension][rowDimension];
-        for (int i = 0; i < rowDimension; i++) {
-            for (int j = 0; j < colDimension; j++) {
-                data[i][j] = covMatrix[i][j].getResult();
+        final double[][] data = new double[dimension][dimension];
+        for (int i = 0; i < dimension; i++) {
+            for (int j = 0; j < dimension; j++) {
+                data[i][j] = getElement(i, j).getResult();
             }
         }
         return data;

Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java?rev=1245133&r1=1245132&r2=1245133&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/StorelessCovarianceTest.java Thu Feb 16 19:41:42 2012
@@ -19,9 +19,9 @@ package org.apache.commons.math3.stat.co
 import org.apache.commons.math3.TestUtils;
 import org.apache.commons.math3.linear.Array2DRowRealMatrix;
 import org.apache.commons.math3.linear.RealMatrix;
+import org.junit.Assert;
 import org.junit.Test;
 
-
 public class StorelessCovarianceTest {
 
     protected final double[] longleyData = new double[] {
@@ -163,9 +163,9 @@ public class StorelessCovarianceTest {
          2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667
         };
 
-        StorelessCovariance covMatrix = new StorelessCovariance(7, 7);
+        StorelessCovariance covMatrix = new StorelessCovariance(7);
         for(int i=0;i<matrix.getRowDimension();i++){
-            covMatrix.incrementRow(matrix.getRow(i));
+            covMatrix.increment(matrix.getRow(i));
         }
 
         RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
@@ -174,8 +174,6 @@ public class StorelessCovarianceTest {
 
     }
 
-
-
     /**
      * Test R Swiss fertility dataset against R.
      * Data Source: R datasets package
@@ -192,9 +190,9 @@ public class StorelessCovarianceTest {
             241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890
          };
 
-        StorelessCovariance covMatrix = new StorelessCovariance(5, 5);
+        StorelessCovariance covMatrix = new StorelessCovariance(5);
         for(int i=0;i<matrix.getRowDimension();i++){
-            covMatrix.incrementRow(matrix.getRow(i));
+            covMatrix.increment(matrix.getRow(i));
         }
 
         RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
@@ -203,93 +201,26 @@ public class StorelessCovarianceTest {
     }
 
     /**
-     * Test Longley dataset against R.
-     * Data Source: J. Longley (1967) "An Appraisal of Least Squares
-     * Programs for the Electronic Computer from the Point of View of the User"
-     * Journal of the American Statistical Association, vol. 62. September,
-     * pp. 819-841.
-     *
-     * Data are from NIST:
-     * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat
+     * Test symmetry of the covariance matrix
      */
     @Test
-    public void testLonglyByEntry() {
-        RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
-
-        double[] rData = new double[] {
-         12333921.73333333246, 3.679666000000000e+04, 343330206.333333313,
-         1649102.666666666744, 1117681.066666666651, 23461965.733333334, 16240.93333333333248,
-         36796.66000000000, 1.164576250000000e+02, 1063604.115416667,
-         6258.666250000000, 3490.253750000000, 73503.000000000, 50.92333333333334,
-         343330206.33333331347, 1.063604115416667e+06, 9879353659.329166412,
-         56124369.854166664183, 30880428.345833335072, 685240944.600000024, 470977.90000000002328,
-         1649102.66666666674, 6.258666250000000e+03, 56124369.854166664,
-         873223.429166666698, -115378.762499999997, 4462741.533333333, 2973.03333333333330,
-         1117681.06666666665, 3.490253750000000e+03, 30880428.345833335,
-         -115378.762499999997, 484304.095833333326, 1764098.133333333, 1382.43333333333339,
-         23461965.73333333433, 7.350300000000000e+04, 685240944.600000024,
-         4462741.533333333209, 1764098.133333333302, 48387348.933333330, 32917.40000000000146,
-         16240.93333333333, 5.092333333333334e+01, 470977.900000000,
-         2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667
-        };
+    public void testSymmetry() {
+        RealMatrix matrix = createRealMatrix(swissData, 47, 5);
 
-        int row = matrix.getRowDimension();
-        int col = matrix.getColumnDimension();
-        double x = 0.0;
-        double y = 0.0;
-        StorelessCovariance covMatrix = new StorelessCovariance(7, 7);
-        for(int i=0;i<row;i++){
-            for(int j=0;j<col;j++){
-                x = matrix.getEntry(i, j);
-                for(int k=0;k<col;k++){
-                    y = matrix.getEntry(i, k);
-                    covMatrix.incrementCovariance(j, k, x, y);
-                }
-            }
+        final int dimension = 5;
+        StorelessCovariance storelessCov = new StorelessCovariance(dimension);
+        for(int i=0;i<matrix.getRowDimension();i++){
+            storelessCov.increment(matrix.getRow(i));
         }
 
-        RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
-
-        TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 7, 7), covarianceMatrix, 10E-7);
-
-    }
-
-    /**
-     * Test R Swiss fertility dataset against R.
-     * Data Source: R datasets package
-     */
-    @Test
-    public void testSwissFertilityByEntry() {
-         RealMatrix matrix = createRealMatrix(swissData, 47, 5);
-
-         double[] rData = new double[] {
-           156.0424976873265, 100.1691489361702, -64.36692876965772, -79.7295097132285, 241.5632030527289,
-           100.169148936170251, 515.7994172062905, -124.39283071230344, -139.6574005550416, 379.9043755781684,
-           -64.3669287696577, -124.3928307123034, 63.64662349676226, 53.5758556891767, -190.5606105457909,
-           -79.7295097132285, -139.6574005550416, 53.57585568917669, 92.4560592044403, -61.6988297872340,
-            241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890
-         };
-
-        int row = matrix.getRowDimension();
-        int col = matrix.getColumnDimension();
-        double x = 0.0;
-        double y = 0.0;
-        StorelessCovariance covMatrix = new StorelessCovariance(5, 5);
-        for(int i=0;i<row;i++){
-            for(int j=0;j<col;j++){
-                x = matrix.getEntry(i, j);
-                for(int k=0;k<col;k++){
-                    y = matrix.getEntry(i, k);
-                    covMatrix.incrementCovariance(j, k, x, y);
-                }
+        double[][] covMatrix = storelessCov.getData();
+        for (int i = 0; i < dimension; i++) {
+            for (int j = i; j < dimension; j++) {
+                Assert.assertEquals(covMatrix[i][j], covMatrix[j][i], 10e-9);
             }
         }
-
-        RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
-
-        TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 5, 5), covarianceMatrix, 10E-13);
     }
-
+    
     protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
         double[][] matrixData = new double[nRows][nCols];
         int ptr = 0;