You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2013/03/27 20:44:22 UTC

svn commit: r1461822 - in /commons/proper/math/trunk/src: changes/changes.xml main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java

Author: tn
Date: Wed Mar 27 19:44:22 2013
New Revision: 1461822

URL: http://svn.apache.org/r1461822
Log:
[MATH-891] SpearmansCorrelation now works correctly when provided with a NaturalRanking with NaNStrategy.REMOVED.

Modified:
    commons/proper/math/trunk/src/changes/changes.xml
    commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java
    commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java

Modified: commons/proper/math/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/changes/changes.xml?rev=1461822&r1=1461821&r2=1461822&view=diff
==============================================================================
--- commons/proper/math/trunk/src/changes/changes.xml (original)
+++ commons/proper/math/trunk/src/changes/changes.xml Wed Mar 27 19:44:22 2013
@@ -55,6 +55,12 @@ This is a minor release: It combines bug
   Changes to existing features were made in a backwards-compatible
   way such as to allow drop-in replacement of the v3.1[.1] JAR file.
 ">
+      <action dev="tn" type="fix" issue="MATH-891">
+        "SpearmansCorrelation" now works correctly in case of a provided
+        "NaturalRanking" with a "NaNStrategy.REMOVED" strategy and the input
+        data contains NaN values. From version 4.0 onwards this strategy will
+        not be supported anymore.
+      </action>
       <action dev="erans" type="update" issue="MATH-956">
         Replaced hard-coded numbers in "LevenbergMarquardtOptimizer".
       </action>

Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java?rev=1461822&r1=1461821&r2=1461822&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java Wed Mar 27 19:44:22 2013
@@ -17,27 +17,32 @@
 
 package org.apache.commons.math3.stat.correlation;
 
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
 import org.apache.commons.math3.exception.DimensionMismatchException;
 import org.apache.commons.math3.exception.MathIllegalArgumentException;
 import org.apache.commons.math3.exception.util.LocalizedFormats;
 import org.apache.commons.math3.linear.BlockRealMatrix;
 import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
 import org.apache.commons.math3.stat.ranking.NaturalRanking;
 import org.apache.commons.math3.stat.ranking.RankingAlgorithm;
 
 /**
- * <p>Spearman's rank correlation. This implementation performs a rank
+ * Spearman's rank correlation. This implementation performs a rank
  * transformation on the input data and then computes {@link PearsonsCorrelation}
- * on the ranked data.</p>
- *
- * <p>By default, ranks are computed using {@link NaturalRanking} with default
+ * on the ranked data.
+ * <p>
+ * By default, ranks are computed using {@link NaturalRanking} with default
  * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
- * The ranking algorithm can be set using a constructor argument.</p>
+ * The ranking algorithm can be set using a constructor argument.
  *
  * @since 2.0
  * @version $Id$
  */
-
 public class SpearmansCorrelation {
 
     /** Input data */
@@ -58,6 +63,9 @@ public class SpearmansCorrelation {
 
     /**
      * Create a SpearmansCorrelation with the given ranking algorithm.
+     * <p>
+     * From version 4.0 onwards this constructor will throw an exception
+     * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy.
      *
      * @param rankingAlgorithm ranking algorithm
      * @since 3.1
@@ -81,15 +89,17 @@ public class SpearmansCorrelation {
     /**
      * Create a SpearmansCorrelation with the given input data matrix
      * and ranking algorithm.
+     * <p>
+     * From version 4.0 onwards this constructor will throw an exception
+     * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy.
      *
      * @param dataMatrix matrix of data with columns representing
      * variables to correlate
      * @param rankingAlgorithm ranking algorithm
      */
     public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
-        this.data = dataMatrix.copy();
         this.rankingAlgorithm = rankingAlgorithm;
-        rankTransform(data);
+        this.data = rankTransform(dataMatrix);
         rankCorrelation = new PearsonsCorrelation(data);
     }
 
@@ -125,9 +135,8 @@ public class SpearmansCorrelation {
      * @param matrix matrix with columns representing variables to correlate
      * @return correlation matrix
      */
-    public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
-        RealMatrix matrixCopy = matrix.copy();
-        rankTransform(matrixCopy);
+    public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
+        final RealMatrix matrixCopy = rankTransform(matrix);
         return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
     }
 
@@ -139,7 +148,7 @@ public class SpearmansCorrelation {
      * @param matrix matrix with columns representing variables to correlate
      * @return correlation matrix
      */
-    public RealMatrix computeCorrelationMatrix(double[][] matrix) {
+    public RealMatrix computeCorrelationMatrix(final double[][] matrix) {
        return computeCorrelationMatrix(new BlockRealMatrix(matrix));
     }
 
@@ -159,20 +168,93 @@ public class SpearmansCorrelation {
             throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION,
                                                    xArray.length, 2);
         } else {
-            return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
-                    rankingAlgorithm.rank(yArray));
+            double[] x = xArray;
+            double[] y = yArray;
+            if (rankingAlgorithm instanceof NaturalRanking &&
+                NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
+                final Set<Integer> nanPositions = new HashSet<Integer>();
+
+                nanPositions.addAll(getNaNPositions(xArray));
+                nanPositions.addAll(getNaNPositions(yArray));
+
+                x = removeValues(xArray, nanPositions);
+                y = removeValues(yArray, nanPositions);
+            }
+            return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(x), rankingAlgorithm.rank(y));
         }
     }
 
     /**
      * Applies rank transform to each of the columns of <code>matrix</code>
-     * using the current <code>rankingAlgorithm</code>
+     * using the current <code>rankingAlgorithm</code>.
      *
      * @param matrix matrix to transform
+     * @return a rank-transformed matrix
+     */
+    private RealMatrix rankTransform(final RealMatrix matrix) {
+        RealMatrix transformed = null;
+
+        if (rankingAlgorithm instanceof NaturalRanking &&
+                ((NaturalRanking) rankingAlgorithm).getNanStrategy() == NaNStrategy.REMOVED) {
+            final Set<Integer> nanPositions = new HashSet<Integer>();
+            for (int i = 0; i < matrix.getColumnDimension(); i++) {
+                nanPositions.addAll(getNaNPositions(matrix.getColumn(i)));
+            }
+
+            // if we have found NaN values, we have to update the matrix size
+            if (!nanPositions.isEmpty()) {
+                transformed = new BlockRealMatrix(matrix.getRowDimension() - nanPositions.size(),
+                                                  matrix.getColumnDimension());
+                for (int i = 0; i < transformed.getColumnDimension(); i++) {
+                    transformed.setColumn(i, removeValues(matrix.getColumn(i), nanPositions));
+                }
+            }
+        }
+
+        if (transformed == null) {
+            transformed = matrix.copy();
+        }
+
+        for (int i = 0; i < transformed.getColumnDimension(); i++) {
+            transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i)));
+        }
+
+        return transformed;
+    }
+
+    /**
+     * Returns a list containing the indices of NaN values in the input array.
+     *
+     * @param input the input array
+     * @return a list of NaN positions in the input array
      */
-    private void rankTransform(RealMatrix matrix) {
-        for (int i = 0; i < matrix.getColumnDimension(); i++) {
-            matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
+    private List<Integer> getNaNPositions(final double[] input) {
+        final List<Integer> positions = new ArrayList<Integer>();
+        for (int i = 0; i < input.length; i++) {
+            if (Double.isNaN(input[i])) {
+                positions.add(i);
+            }
+        }
+        return positions;
+    }
+
+    /**
+     * Removes all values from the input array at the specified indices.
+     *
+     * @param input the input array
+     * @param indices a set containing the indices to be removed
+     * @return the input array without the values at the specified indices
+     */
+    private double[] removeValues(final double[] input, final Set<Integer> indices) {
+        if (indices.isEmpty()) {
+            return input;
+        }
+        final double[] result = new double[input.length - indices.size()];
+        for (int i = 0, j = 0; i < input.length; i++) {
+            if (!indices.contains(i)) {
+                result[j++] = input[i];
+            }
         }
+        return result;
     }
 }

Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java?rev=1461822&r1=1461821&r2=1461822&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java Wed Mar 27 19:44:22 2013
@@ -18,7 +18,10 @@ package org.apache.commons.math3.stat.co
 
 import org.apache.commons.math3.TestUtils;
 import org.apache.commons.math3.linear.BlockRealMatrix;
+import org.apache.commons.math3.linear.MatrixUtils;
 import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
+import org.apache.commons.math3.stat.ranking.NaturalRanking;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -118,6 +121,35 @@ public class SpearmansRankCorrelationTes
                 new SpearmansCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE);
     }
 
+    @Test
+    public void testMath891Array() {
+        final double[] xArray = new double[] { Double.NaN, 1.9, 2, 100, 3 };
+        final double[] yArray = new double[] { 10, 2, 10, Double.NaN, 4 };
+
+        NaturalRanking ranking = new NaturalRanking(NaNStrategy.REMOVED);
+        SpearmansCorrelation spearman = new SpearmansCorrelation(ranking);
+        
+        Assert.assertEquals(0.5, spearman.correlation(xArray, yArray), Double.MIN_VALUE);
+    }
+
+    @Test
+    public void testMath891Matrix() {
+        final double[] xArray = new double[] { Double.NaN, 1.9, 2, 100, 3 };
+        final double[] yArray = new double[] { 10, 2, 10, Double.NaN, 4 };
+
+        RealMatrix matrix = MatrixUtils.createRealMatrix(xArray.length, 2);
+        for (int i = 0; i < xArray.length; i++) {
+            matrix.addToEntry(i, 0, xArray[i]);
+            matrix.addToEntry(i, 1, yArray[i]);
+        }
+
+        // compute correlation
+        NaturalRanking ranking = new NaturalRanking(NaNStrategy.REMOVED);
+        SpearmansCorrelation spearman = new SpearmansCorrelation(matrix, ranking);
+        
+        Assert.assertEquals(0.5, spearman.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE);
+    }
+
     // Not relevant here
     @Override
     @Test