You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2013/11/09 22:32:06 UTC

svn commit: r1540395 - /commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java

Author: psteitz
Date: Sat Nov  9 21:32:06 2013
New Revision: 1540395

URL: http://svn.apache.org/r1540395
Log:
Clarified contracts re NaNs, IAEs and when constructor arguments are necessary.

Modified:
    commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java

Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java?rev=1540395&r1=1540394&r2=1540395&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java Sat Nov  9 21:32:06 2013
@@ -34,9 +34,17 @@ import org.apache.commons.math3.util.Fas
  * <code>double[][]</code> arguments generate correlation matrices.  The
  * columns of the input matrices are assumed to represent variable values.
  * Correlations are given by the formula</p>
- * <code>cor(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
+ *
+ * <p><code>cor(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
  * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
- * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
+ * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.</p>
+ *
+ * <p>To compute the correlation coefficient for a single pair of arrays, use {@link #PearsonsCorrelation()}
+ * to construct an instance with no data and then {@link #correlation(double[], double[])}.
+ * Correlation matrices can also be computed directly from an instance with no data using
+ * {@link #computeCorrelationMatrix(double[][])}. In order to use {@link #getCorrelationMatrix()},
+ * {@link #getCorrelationPValues()},  or {@link #getCorrelationStandardErrors()}; however, one of the
+ * constructors supplying data or a covariance matrix must be used to create the instance.</p>
  *
  * @version $Id$
  * @since 2.0
@@ -50,7 +58,7 @@ public class PearsonsCorrelation {
     private final int nObs;
 
     /**
-     * Create a PearsonsCorrelation instance without data
+     * Create a PearsonsCorrelation instance without data.
      */
     public PearsonsCorrelation() {
         super();
@@ -62,9 +70,14 @@ public class PearsonsCorrelation {
      * Create a PearsonsCorrelation from a rectangular array
      * whose columns represent values of variables to be correlated.
      *
+     * Throws MathIllegalArgumentException if the input array does not have at least
+     * two columns and two rows.  Pairwise correlations are set to NaN if one
+     * of the correlates has zero variance.
+     *
      * @param data rectangular array with columns representing variables
-     * @throws IllegalArgumentException if the input data array is not
+     * @throws MathIllegalArgumentException if the input data array is not
      * rectangular with at least two rows and two columns.
+     * @see #correlation(double[], double[])
      */
     public PearsonsCorrelation(double[][] data) {
         this(new BlockRealMatrix(data));
@@ -74,10 +87,15 @@ public class PearsonsCorrelation {
      * Create a PearsonsCorrelation from a RealMatrix whose columns
      * represent variables to be correlated.
      *
+     * Throws MathIllegalArgumentException if the matrix does not have at least
+     * two columns and two rows.  Pairwise correlations are set to NaN if one
+     * of the correlates has zero variance.
+     *
      * @param matrix matrix with columns representing variables to correlate
+     * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
+     * @see #correlation(double[], double[])
      */
     public PearsonsCorrelation(RealMatrix matrix) {
-        checkSufficientData(matrix);
         nObs = matrix.getRowDimension();
         correlationMatrix = computeCorrelationMatrix(matrix);
     }
@@ -100,7 +118,7 @@ public class PearsonsCorrelation {
     }
 
     /**
-     * Create a PearsonsCorrelation from a covariance matrix.  The correlation
+     * Create a PearsonsCorrelation from a covariance matrix. The correlation
      * matrix is computed by scaling the covariance matrix.
      *
      * @param covarianceMatrix covariance matrix
@@ -110,11 +128,14 @@ public class PearsonsCorrelation {
     public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
         nObs = numberOfObservations;
         correlationMatrix = covarianceToCorrelation(covarianceMatrix);
-
     }
 
     /**
-     * Returns the correlation matrix
+     * Returns the correlation matrix.
+     *
+     * <p>This method will return null if the argumentless constructor was used
+     * to create this instance, even if {@link #computeCorrelationMatrix(double[][])}
+     * has been called before it is activated.</p>
      *
      * @return correlation matrix
      */
@@ -127,12 +148,17 @@ public class PearsonsCorrelation {
      * in the correlation matrix.<br/>
      * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
      * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
+     *
      * <p>The formula used to compute the standard error is <br/>
      * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
      * where <code>r</code> is the estimated correlation coefficient and
      * <code>n</code> is the number of observations in the source dataset.</p>
      *
+     * <p>To use this method, one of the constructors that supply an input
+     * matrix must have been used to create this instance.</p>
+     *
      * @return matrix of correlation standard errors
+     * @throws NullPointerException if this instance was created with no data
      */
     public RealMatrix getCorrelationStandardErrors() {
         int nVars = correlationMatrix.getColumnDimension();
@@ -149,16 +175,22 @@ public class PearsonsCorrelation {
     /**
      * Returns a matrix of p-values associated with the (two-sided) null
      * hypothesis that the corresponding correlation coefficient is zero.
+     *
      * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
      * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
      * a value with absolute value greater than or equal to <br>
      * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
+     *
      * <p>The values in the matrix are sometimes referred to as the
      * <i>significance</i> of the corresponding correlation coefficients.</p>
      *
+     * <p>To use this method, one of the constructors that supply an input
+     * matrix must have been used to create this instance.</p>
+     *
      * @return matrix of p-values
      * @throws org.apache.commons.math3.exception.MaxCountExceededException
      * if an error occurs estimating probabilities
+     * @throws NullPointerException if this instance was created with no data
      */
     public RealMatrix getCorrelationPValues() {
         TDistribution tDistribution = new TDistribution(nObs - 2);
@@ -181,12 +213,19 @@ public class PearsonsCorrelation {
 
     /**
      * Computes the correlation matrix for the columns of the
-     * input matrix.
+     * input matrix, using {@link #correlation(double[], double[])}.
+     *
+     * Throws MathIllegalArgumentException if the matrix does not have at least
+     * two columns and two rows.  Pairwise correlations are set to NaN if one
+     * of the correlates has zero variance.
      *
      * @param matrix matrix with columns representing variables to correlate
      * @return correlation matrix
+     * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
+     * @see #correlation(double[], double[])
      */
     public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
+        checkSufficientData(matrix);
         int nVars = matrix.getColumnDimension();
         RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
         for (int i = 0; i < nVars; i++) {
@@ -202,21 +241,29 @@ public class PearsonsCorrelation {
 
     /**
      * Computes the correlation matrix for the columns of the
-     * input rectangular array.  The colums of the array represent values
+     * input rectangular array.  The columns of the array represent values
      * of variables to be correlated.
      *
+     * Throws MathIllegalArgumentException if the matrix does not have at least
+     * two columns and two rows or if the array is not rectangular. Pairwise
+     * correlations are set to NaN if one of the correlates has zero variance.
+     *
      * @param data matrix with columns representing variables to correlate
      * @return correlation matrix
+     * @throws MathIllegalArgumentException if the array does not contain sufficient data
+     * @see #correlation(double[], double[])
      */
     public RealMatrix computeCorrelationMatrix(double[][] data) {
        return computeCorrelationMatrix(new BlockRealMatrix(data));
     }
 
     /**
-     * Computes the Pearson's product-moment correlation coefficient between the two arrays.
+     * Computes the Pearson's product-moment correlation coefficient between two arrays.
      *
-     * </p>Throws IllegalArgumentException if the arrays do not have the same length
-     * or their common length is less than 2</p>
+     * <p>Throws MathIllegalArgumentException if the arrays do not have the same length
+     * or their common length is less than 2.  Returns {@code NaN} if either of the arrays
+     * has zero variance (i.e., if one of the arrays does not contain at least two distinct
+     * values).</p>
      *
      * @param xArray first data array
      * @param yArray second data array
@@ -267,8 +314,8 @@ public class PearsonsCorrelation {
     }
 
     /**
-     * Throws IllegalArgumentException of the matrix does not have at least
-     * two columns and two rows
+     * Throws MathIllegalArgumentException if the matrix does not have at least
+     * two columns and two rows.
      *
      * @param matrix matrix to check for sufficiency
      * @throws MathIllegalArgumentException if there is insufficient data