You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by tn...@apache.org on 2013/03/27 20:44:22 UTC
svn commit: r1461822 - in /commons/proper/math/trunk/src: changes/changes.xml
main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java
test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java
Author: tn
Date: Wed Mar 27 19:44:22 2013
New Revision: 1461822
URL: http://svn.apache.org/r1461822
Log:
[MATH-891] SpearmansCorrelation now works correctly when provided with a NaturalRanking with NaNStrategy.REMOVED.
Modified:
commons/proper/math/trunk/src/changes/changes.xml
commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java
commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java
Modified: commons/proper/math/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/changes/changes.xml?rev=1461822&r1=1461821&r2=1461822&view=diff
==============================================================================
--- commons/proper/math/trunk/src/changes/changes.xml (original)
+++ commons/proper/math/trunk/src/changes/changes.xml Wed Mar 27 19:44:22 2013
@@ -55,6 +55,12 @@ This is a minor release: It combines bug
Changes to existing features were made in a backwards-compatible
way such as to allow drop-in replacement of the v3.1[.1] JAR file.
">
+ <action dev="tn" type="fix" issue="MATH-891">
+ "SpearmansCorrelation" now works correctly in case of a provided
+ "NaturalRanking" with a "NaNStrategy.REMOVED" strategy and the input
+ data contains NaN values. From version 4.0 onwards this strategy will
+ not be supported anymore.
+ </action>
<action dev="erans" type="update" issue="MATH-956">
Replaced hard-coded numbers in "LevenbergMarquardtOptimizer".
</action>
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java?rev=1461822&r1=1461821&r2=1461822&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java Wed Mar 27 19:44:22 2013
@@ -17,27 +17,32 @@
package org.apache.commons.math3.stat.correlation;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.linear.BlockRealMatrix;
import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
import org.apache.commons.math3.stat.ranking.NaturalRanking;
import org.apache.commons.math3.stat.ranking.RankingAlgorithm;
/**
- * <p>Spearman's rank correlation. This implementation performs a rank
+ * Spearman's rank correlation. This implementation performs a rank
* transformation on the input data and then computes {@link PearsonsCorrelation}
- * on the ranked data.</p>
- *
- * <p>By default, ranks are computed using {@link NaturalRanking} with default
+ * on the ranked data.
+ * <p>
+ * By default, ranks are computed using {@link NaturalRanking} with default
* strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
- * The ranking algorithm can be set using a constructor argument.</p>
+ * The ranking algorithm can be set using a constructor argument.
*
* @since 2.0
* @version $Id$
*/
-
public class SpearmansCorrelation {
/** Input data */
@@ -58,6 +63,9 @@ public class SpearmansCorrelation {
/**
* Create a SpearmansCorrelation with the given ranking algorithm.
+ * <p>
+ * From version 4.0 onwards this constructor will throw an exception
+ * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy.
*
* @param rankingAlgorithm ranking algorithm
* @since 3.1
@@ -81,15 +89,17 @@ public class SpearmansCorrelation {
/**
* Create a SpearmansCorrelation with the given input data matrix
* and ranking algorithm.
+ * <p>
+ * From version 4.0 onwards this constructor will throw an exception
+ * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy.
*
* @param dataMatrix matrix of data with columns representing
* variables to correlate
* @param rankingAlgorithm ranking algorithm
*/
public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
- this.data = dataMatrix.copy();
this.rankingAlgorithm = rankingAlgorithm;
- rankTransform(data);
+ this.data = rankTransform(dataMatrix);
rankCorrelation = new PearsonsCorrelation(data);
}
@@ -125,9 +135,8 @@ public class SpearmansCorrelation {
* @param matrix matrix with columns representing variables to correlate
* @return correlation matrix
*/
- public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
- RealMatrix matrixCopy = matrix.copy();
- rankTransform(matrixCopy);
+ public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
+ final RealMatrix matrixCopy = rankTransform(matrix);
return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
}
@@ -139,7 +148,7 @@ public class SpearmansCorrelation {
* @param matrix matrix with columns representing variables to correlate
* @return correlation matrix
*/
- public RealMatrix computeCorrelationMatrix(double[][] matrix) {
+ public RealMatrix computeCorrelationMatrix(final double[][] matrix) {
return computeCorrelationMatrix(new BlockRealMatrix(matrix));
}
@@ -159,20 +168,93 @@ public class SpearmansCorrelation {
throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION,
xArray.length, 2);
} else {
- return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
- rankingAlgorithm.rank(yArray));
+ double[] x = xArray;
+ double[] y = yArray;
+ if (rankingAlgorithm instanceof NaturalRanking &&
+ NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
+ final Set<Integer> nanPositions = new HashSet<Integer>();
+
+ nanPositions.addAll(getNaNPositions(xArray));
+ nanPositions.addAll(getNaNPositions(yArray));
+
+ x = removeValues(xArray, nanPositions);
+ y = removeValues(yArray, nanPositions);
+ }
+ return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(x), rankingAlgorithm.rank(y));
}
}
/**
* Applies rank transform to each of the columns of <code>matrix</code>
- * using the current <code>rankingAlgorithm</code>
+ * using the current <code>rankingAlgorithm</code>.
*
* @param matrix matrix to transform
+ * @return a rank-transformed matrix
+ */
+ private RealMatrix rankTransform(final RealMatrix matrix) {
+ RealMatrix transformed = null;
+
+ if (rankingAlgorithm instanceof NaturalRanking &&
+ ((NaturalRanking) rankingAlgorithm).getNanStrategy() == NaNStrategy.REMOVED) {
+ final Set<Integer> nanPositions = new HashSet<Integer>();
+ for (int i = 0; i < matrix.getColumnDimension(); i++) {
+ nanPositions.addAll(getNaNPositions(matrix.getColumn(i)));
+ }
+
+ // if we have found NaN values, we have to update the matrix size
+ if (!nanPositions.isEmpty()) {
+ transformed = new BlockRealMatrix(matrix.getRowDimension() - nanPositions.size(),
+ matrix.getColumnDimension());
+ for (int i = 0; i < transformed.getColumnDimension(); i++) {
+ transformed.setColumn(i, removeValues(matrix.getColumn(i), nanPositions));
+ }
+ }
+ }
+
+ if (transformed == null) {
+ transformed = matrix.copy();
+ }
+
+ for (int i = 0; i < transformed.getColumnDimension(); i++) {
+ transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i)));
+ }
+
+ return transformed;
+ }
+
+ /**
+ * Returns a list containing the indices of NaN values in the input array.
+ *
+ * @param input the input array
+ * @return a list of NaN positions in the input array
*/
- private void rankTransform(RealMatrix matrix) {
- for (int i = 0; i < matrix.getColumnDimension(); i++) {
- matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
+ private List<Integer> getNaNPositions(final double[] input) {
+ final List<Integer> positions = new ArrayList<Integer>();
+ for (int i = 0; i < input.length; i++) {
+ if (Double.isNaN(input[i])) {
+ positions.add(i);
+ }
+ }
+ return positions;
+ }
+
+ /**
+ * Removes all values from the input array at the specified indices.
+ *
+ * @param input the input array
+ * @param indices a set containing the indices to be removed
+ * @return the input array without the values at the specified indices
+ */
+ private double[] removeValues(final double[] input, final Set<Integer> indices) {
+ if (indices.isEmpty()) {
+ return input;
+ }
+ final double[] result = new double[input.length - indices.size()];
+ for (int i = 0, j = 0; i < input.length; i++) {
+ if (!indices.contains(i)) {
+ result[j++] = input[i];
+ }
}
+ return result;
}
}
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java?rev=1461822&r1=1461821&r2=1461822&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/correlation/SpearmansRankCorrelationTest.java Wed Mar 27 19:44:22 2013
@@ -18,7 +18,10 @@ package org.apache.commons.math3.stat.co
import org.apache.commons.math3.TestUtils;
import org.apache.commons.math3.linear.BlockRealMatrix;
+import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
+import org.apache.commons.math3.stat.ranking.NaturalRanking;
import org.junit.Assert;
import org.junit.Test;
@@ -118,6 +121,35 @@ public class SpearmansRankCorrelationTes
new SpearmansCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE);
}
+ @Test
+ public void testMath891Array() {
+ final double[] xArray = new double[] { Double.NaN, 1.9, 2, 100, 3 };
+ final double[] yArray = new double[] { 10, 2, 10, Double.NaN, 4 };
+
+ NaturalRanking ranking = new NaturalRanking(NaNStrategy.REMOVED);
+ SpearmansCorrelation spearman = new SpearmansCorrelation(ranking);
+
+ Assert.assertEquals(0.5, spearman.correlation(xArray, yArray), Double.MIN_VALUE);
+ }
+
+ @Test
+ public void testMath891Matrix() {
+ final double[] xArray = new double[] { Double.NaN, 1.9, 2, 100, 3 };
+ final double[] yArray = new double[] { 10, 2, 10, Double.NaN, 4 };
+
+ RealMatrix matrix = MatrixUtils.createRealMatrix(xArray.length, 2);
+ for (int i = 0; i < xArray.length; i++) {
+ matrix.addToEntry(i, 0, xArray[i]);
+ matrix.addToEntry(i, 1, yArray[i]);
+ }
+
+ // compute correlation
+ NaturalRanking ranking = new NaturalRanking(NaNStrategy.REMOVED);
+ SpearmansCorrelation spearman = new SpearmansCorrelation(matrix, ranking);
+
+ Assert.assertEquals(0.5, spearman.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE);
+ }
+
// Not relevant here
@Override
@Test