You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2022/12/06 16:37:31 UTC
[commons-statistics] branch master updated: STATISTICS-63: Add ranking module
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
The following commit(s) were added to refs/heads/master by this push:
new 018b1a8 STATISTICS-63: Add ranking module
018b1a8 is described below
commit 018b1a8401c6165a0b876c5814121e09ae4757e3
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Sat Dec 3 19:02:55 2022 +0000
STATISTICS-63: Add ranking module
Port o.a.c.math4.stat.ranking to a new ranking module.
---
commons-statistics-examples/examples-jmh/pom.xml | 6 +
.../jmh/ranking/NaturalRankingPerformance.java | 304 ++++++++++
.../examples/jmh/ranking/package-info.java | 21 +
.../jmh/ranking/NaturalRankingPerformanceTest.java | 77 +++
commons-statistics-examples/pom.xml | 5 +
commons-statistics-ranking/LICENSE | 201 +++++++
commons-statistics-ranking/NOTICE | 5 +
commons-statistics-ranking/README.md | 105 ++++
commons-statistics-ranking/pom.xml | 73 +++
.../commons/statistics/ranking/NaNStrategy.java | 50 ++
.../commons/statistics/ranking/NaturalRanking.java | 582 ++++++++++++++++++++
.../statistics/ranking/RankingAlgorithm.java | 42 ++
.../commons/statistics/ranking/TiesStrategy.java | 68 +++
.../commons/statistics/ranking/package-info.java | 21 +
.../src/site/resources/profile.jacoco | 17 +
commons-statistics-ranking/src/site/site.xml | 38 ++
commons-statistics-ranking/src/site/xdoc/index.xml | 52 ++
.../statistics/ranking/NaturalRankingTest.java | 611 +++++++++++++++++++++
.../commons/statistics/ranking/UserGuideTest.java | 34 ++
pom.xml | 1 +
src/conf/pmd/pmd-ruleset.xml | 14 +
src/conf/spotbugs/spotbugs-exclude-filter.xml | 5 +
22 files changed, 2332 insertions(+)
diff --git a/commons-statistics-examples/examples-jmh/pom.xml b/commons-statistics-examples/examples-jmh/pom.xml
index 2e4abed..cfa3ab8 100644
--- a/commons-statistics-examples/examples-jmh/pom.xml
+++ b/commons-statistics-examples/examples-jmh/pom.xml
@@ -36,6 +36,11 @@
<artifactId>commons-statistics-distribution</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-statistics-ranking</artifactId>
+ </dependency>
+
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-rng-simple</artifactId>
@@ -204,6 +209,7 @@
</goals>
<configuration>
<finalName>${uberjar.name}</finalName>
+ <createDependencyReducedPom>false</createDependencyReducedPom>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>${project.mainClass}</mainClass>
diff --git a/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/ranking/NaturalRankingPerformance.java b/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/ranking/NaturalRankingPerformance.java
new file mode 100644
index 0000000..923a56b
--- /dev/null
+++ b/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/ranking/NaturalRankingPerformance.java
@@ -0,0 +1,304 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.statistics.examples.jmh.ranking;
+
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+import java.util.function.UnaryOperator;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.distribution.DirichletSampler;
+import org.apache.commons.rng.simple.RandomSource;
+import org.apache.commons.statistics.ranking.NaturalRanking;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+/**
+ * Executes a benchmark of the ranking of {@code double} array data.
+ */
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgs = {"-server", "-Xms512M", "-Xmx512M"})
+public class NaturalRankingPerformance {
+ /**
+ * Source of {@code double} array data to rank.
+ */
+ @State(Scope.Benchmark)
+ public static class DataSource {
+ /** Data length. */
+ @Param({"10000"})
+ private int length;
+ /** Fraction of total length that has tied (equal) data. */
+ @Param({"0", "0.1", "0.5"})
+ private double tieFraction;
+ /** Count of the number of distinct runs of tied (equal) data. */
+ @Param({"20"})
+ private int ties;
+ /** Concentration parameter for the distribution of tie lengths. */
+ @Param({"1"})
+ private double alpha;
+
+ /** Data to rank. */
+ private double[] data;
+
+ /**
+ * @return the data
+ */
+ public double[] getData() {
+ return data;
+ }
+
+ /**
+ * Create the data to rank.
+ */
+ @Setup(Level.Iteration)
+ public void setup() {
+ // Data will be randomized per iteration
+ final UniformRandomProvider rng = RandomSource.XO_RO_SHI_RO_128_PP.create();
+ data = createData(rng, length, tieFraction, ties, alpha);
+ }
+
+ /**
+ * Creates the data.
+ * This is package-private for testing.
+ *
+ * @param rng Source of randomness
+ * @param length Data length.
+ * @param tieFraction Fraction of total length that has tied (equal) data.
+ * @param ties Count of the number of distinct runs of tied (equal) data.
+ * @param alpha Concentration parameter for the distribution of tie lengths.
+ * @return the data
+ */
+ static double[] createData(UniformRandomProvider rng,
+ int length, double tieFraction, int ties, double alpha) {
+ assert length > 0 : "Invalid data length";
+ assert tieFraction <= 1 && tieFraction >= 0 : "Invalid tie fraction";
+ assert ties >= 0 : "Invalid number of ties";
+ assert alpha >= 0 : "Invalid concentration parameter";
+
+ // The data will contain n regions of data, each with the same value,
+ // then the rest is a sequence. This is then shuffled.
+ final double[] data = new double[length];
+ int count = 0;
+ int value = 0;
+
+ // Create tie regions
+ final int tiesLength = (int) Math.round(tieFraction * length);
+ if (ties > 0 && tiesLength > 0) {
+ // Cut the ties length into parts.
+ // Note that due to randomness some lengths may be <= 1 and therefore not a tie.
+ // This increasingly occurs as alpha -> 0.
+ // See: https://en.wikipedia.org/wiki/Dirichlet_distribution#String_cutting
+ final double[] tieFractions = DirichletSampler.symmetric(rng, ties, alpha).sample();
+ final int[] lengths = Arrays.stream(tieFractions)
+ .mapToInt(f -> (int) Math.round(f * tiesLength))
+ .toArray();
+ for (final int len : lengths) {
+ ++value;
+ // Lengths may sum to more than tiesLength due to rounding so
+ // consume most we can
+ for (int i = Math.min(len, tiesLength - count); i > 0; i--) {
+ data[count++] = value;
+ }
+ }
+ }
+
+ // Fill remaining values
+ while (count < data.length) {
+ data[count++] = ++value;
+ }
+
+ // Fisher-Yates shuffle
+ for (int i = data.length; i > 1; i--) {
+ swap(data, i - 1, rng.nextInt(i));
+ }
+
+ return data;
+ }
+
+ /**
+ * Swaps the two specified elements in the specified array.
+ *
+ * @param array Data array
+ * @param i First index
+ * @param j Second index
+ */
+ private static void swap(double[] array, int i, int j) {
+ final double tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ /**
+ * Source of a function to rank {@code double} array data.
+ */
+ @State(Scope.Benchmark)
+ public static class RankingSource {
+ /** Name for baseline implementation. */
+ private static final String METHOD_BASELINE = "baseline";
+ /** Name for Commons Statistics implementation. */
+ private static final String METHOD_STATISTICS = "statistics";
+ /** Name for Commons Math3 implementation. */
+ private static final String METHOD_MATH3 = "math3";
+ /** The ranking method. */
+ @Param({"baseline", METHOD_STATISTICS, METHOD_MATH3})
+ private String ranking;
+
+ /** The ranking function. */
+ private UnaryOperator<double[]> fun;
+
+ /**
+ * @return the ranking function
+ */
+ public UnaryOperator<double[]> getFunction() {
+ return fun;
+ }
+
+ /**
+ * Create the ranking function.
+ */
+ @Setup(Level.Iteration)
+ public void setup() {
+ fun = createFunction(ranking);
+ }
+
+ /**
+ * Creates the ranking function.
+ * This is package-private for testing.
+ *
+ * @param name The function name.
+ * @return the ranking function
+ */
+ static UnaryOperator<double[]> createFunction(String name) {
+ if (METHOD_BASELINE.equals(name)) {
+ return new SortRanking();
+ } else if (METHOD_STATISTICS.equals(name)) {
+ return new NaturalRanking();
+ } else if (METHOD_MATH3.equals(name)) {
+ return new org.apache.commons.math3.stat.ranking.NaturalRanking()::rank;
+ } else {
+ throw new IllegalStateException("Unknown method: " + name);
+ }
+ }
+
+ /**
+ * Gets the names for valid ranking functions.
+ * This is package-private for testing.
+ *
+ * @return the function names
+ */
+ static String[] getFunctionNames() {
+ // Do not return the baseline method
+ return new String[] {METHOD_STATISTICS, METHOD_MATH3};
+ }
+
+ /**
+ * Class to create a ranking using a sort of the data. This ranking does not
+ * resolve ties and is a baseline for the speed of {@link Arrays#sort(Object[])}.
+ */
+ private static class SortRanking implements UnaryOperator<double[]> {
+ @Override
+ public double[] apply(double[] in) {
+ final DataPosition[] data = new DataPosition[in.length];
+ for (int i = 0; i < in.length; i++) {
+ data[i] = new DataPosition(in[i], i);
+ }
+ Arrays.sort(data);
+ final double[] out = new double[in.length];
+ for (int i = 0; i < in.length; i++) {
+ out[data[i].getPosition()] = i + 1.0;
+ }
+ return out;
+ }
+
+ // Copied from NaturalRanking for baseline equivalence.
+
+ /**
+ * Represents the position of a {@code double} value in a data array. The
+ * Comparable interface is implemented so Arrays.sort can be used to sort an
+ * array of data positions by value. Note that the implicitly defined natural
+ * ordering is NOT consistent with equals.
+ */
+ private static class DataPosition implements Comparable<DataPosition> {
+ /** Data value. */
+ private final double value;
+ /** Data position. */
+ private final int position;
+
+ /**
+ * Create an instance with the given value and position.
+ *
+ * @param value Data value.
+ * @param position Data position.
+ */
+ DataPosition(double value, int position) {
+ this.value = value;
+ this.position = position;
+ }
+
+ /**
+ * Compare this value to another.
+ * Only the <strong>values</strong> are compared.
+ *
+ * @param other the other pair to compare this to
+ * @return result of {@code Double.compare(value, other.value)}
+ */
+ @Override
+ public int compareTo(DataPosition other) {
+ return Double.compare(value, other.value);
+ }
+
+ // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
+
+ /**
+ * Returns the data position.
+ *
+ * @return position
+ */
+ int getPosition() {
+ return position;
+ }
+ }
+ }
+ }
+
+ /**
+ * Rank {@code double} array data.
+ *
+ * @param source Source of the data.
+ * @param ranking Source of the ranking function.
+ * @return the ranking
+ */
+ @Benchmark
+ public double[] sample(DataSource source, RankingSource ranking) {
+ return ranking.getFunction().apply(source.getData());
+ }
+}
diff --git a/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/ranking/package-info.java b/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/ranking/package-info.java
new file mode 100644
index 0000000..9f62bd8
--- /dev/null
+++ b/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/ranking/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Benchmarks for the {@code org.apache.commons.statistics.ranking} components.
+ */
+package org.apache.commons.statistics.examples.jmh.ranking;
diff --git a/commons-statistics-examples/examples-jmh/src/test/java/org/apache/commons/statistics/examples/jmh/ranking/NaturalRankingPerformanceTest.java b/commons-statistics-examples/examples-jmh/src/test/java/org/apache/commons/statistics/examples/jmh/ranking/NaturalRankingPerformanceTest.java
new file mode 100644
index 0000000..3cd885a
--- /dev/null
+++ b/commons-statistics-examples/examples-jmh/src/test/java/org/apache/commons/statistics/examples/jmh/ranking/NaturalRankingPerformanceTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.statistics.examples.jmh.ranking;
+
+import java.util.function.UnaryOperator;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.simple.RandomSource;
+import org.apache.commons.statistics.examples.jmh.ranking.NaturalRankingPerformance.DataSource;
+import org.apache.commons.statistics.examples.jmh.ranking.NaturalRankingPerformance.RankingSource;
+import org.apache.commons.statistics.ranking.NaturalRanking;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+
+/**
+ * Executes tests for {@link NaturalRankingPerformance}.
+ */
+class NaturalRankingPerformanceTest {
+ /**
+ * Test the ranking of data from the data source.
+ *
+ * <p>Each known method in the benchmark is tested to: create the same ranking;
+ * and not destructively modify the data. Violation of these assumptions will
+ * invalidate the performance benchmark.
+ */
+ @ParameterizedTest
+ @CsvSource({
+ "100, 0, 0, 1",
+ "100, 0.5, 5, 1",
+ "100, 1.0, 5, 0.5",
+ "100, 0.25, 5, 2",
+ })
+ void testDataSource(int length, double tieFraction, int ties, double alpha) {
+ final UniformRandomProvider rng = RandomSource.XO_RO_SHI_RO_128_PP.create();
+ final double[] data = DataSource.createData(rng, length, tieFraction, ties, alpha);
+
+ final double[] original = data.clone();
+ double[] ranking = null;
+ for (final String name : RankingSource.getFunctionNames()) {
+ final double[] r = RankingSource.createFunction(name).apply(data);
+ Assertions.assertArrayEquals(original, data, () -> name + " destroyed the data");
+ if (ranking == null) {
+ ranking = r;
+ } else {
+ Assertions.assertArrayEquals(ranking, r, () -> name + " has a different ranking");
+ }
+ }
+ }
+
+ /**
+ * Demonstrate the Commons Math3 bug that has been fixed in statistics.
+ */
+ @Test
+ void testNoData() {
+ final double[] empty = {};
+ final UnaryOperator<double[]> fun1 = new org.apache.commons.math3.stat.ranking.NaturalRanking()::rank;
+ Assertions.assertThrows(ArrayIndexOutOfBoundsException.class, () -> fun1.apply(empty));
+ final UnaryOperator<double[]> fun2 = new NaturalRanking();
+ Assertions.assertArrayEquals(empty, fun2.apply(empty));
+ }
+}
diff --git a/commons-statistics-examples/pom.xml b/commons-statistics-examples/pom.xml
index f3690f8..d95611b 100644
--- a/commons-statistics-examples/pom.xml
+++ b/commons-statistics-examples/pom.xml
@@ -60,6 +60,11 @@
<artifactId>commons-statistics-distribution</artifactId>
<version>1.1-SNAPSHOT</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-statistics-ranking</artifactId>
+ <version>1.1-SNAPSHOT</version>
+ </dependency>
<dependency>
<groupId>info.picocli</groupId>
<artifactId>picocli</artifactId>
diff --git a/commons-statistics-ranking/LICENSE b/commons-statistics-ranking/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/commons-statistics-ranking/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/commons-statistics-ranking/NOTICE b/commons-statistics-ranking/NOTICE
new file mode 100644
index 0000000..5c79569
--- /dev/null
+++ b/commons-statistics-ranking/NOTICE
@@ -0,0 +1,5 @@
+Apache Commons Statistics
+Copyright 2018-2022 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
diff --git a/commons-statistics-ranking/README.md b/commons-statistics-ranking/README.md
new file mode 100644
index 0000000..5d1e5f4
--- /dev/null
+++ b/commons-statistics-ranking/README.md
@@ -0,0 +1,105 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!---
+ +======================================================================+
+ |**** ****|
+ |**** THIS FILE IS GENERATED BY THE COMMONS BUILD PLUGIN ****|
+ |**** DO NOT EDIT DIRECTLY ****|
+ |**** ****|
+ +======================================================================+
+ | TEMPLATE FILE: readme-md-template.md |
+ | commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates |
+ +======================================================================+
+ | |
+ | 1) Re-generate using: mvn commons-build:readme-md |
+ | |
+ | 2) Set the following properties in the component's pom: |
+ | - commons.componentid (required, alphabetic, lower case) |
+ | - commons.release.version (required) |
+ | |
+ | 3) Example Properties |
+ | |
+ | <properties> |
+ | <commons.componentid>math</commons.componentid> |
+ | <commons.release.version>1.2</commons.release.version> |
+ | </properties> |
+ | |
+ +======================================================================+
+--->
+Apache Commons Statistics Ranking
+===================
+
+[![Build Status](https://github.com/apache/commons-statistics/actions/workflows/maven.yml/badge.svg)](https://github.com/apache/commons-statistics/actions/workflows/maven.yml)
+[![Coverage Status](https://codecov.io/gh/apache/commons-statistics/branch/master/graph/badge.svg)](https://app.codecov.io/gh/apache/commons-statistics)
+[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-statistics-ranking/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-statistics-ranking/)
+[![License](http://img.shields.io/:license-apache-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0.html)
+
+Statistical rank transformations.
+
+Documentation
+-------------
+
+More information can be found on the [Apache Commons Statistics Ranking Homepage](https://commons.apache.org/proper/commons-statistics).
+The [Javadoc](https://commons.apache.org/proper/commons-statistics/javadocs/api-release) can be browsed.
+Questions related to the usage of Apache Commons Statistics Ranking should be posted to the [user mailing list][ml].
+
+Where can I get the latest release?
+-----------------------------------
+You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-statistics/download_statistics.cgi).
+
+Alternatively you can pull it from the central Maven repositories:
+
+```xml
+<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-statistics-ranking</artifactId>
+ <version>1.0</version>
+</dependency>
+```
+
+Contributing
+------------
+
+We accept Pull Requests via GitHub. The [developer mailing list][ml] is the main channel of communication for contributors.
+There are some guidelines which will make applying PRs easier for us:
++ No tabs! Please use spaces for indentation.
++ Respect the code style.
++ Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change.
++ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn```.
+
+If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas).
+You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md).
+
+License
+-------
+This code is under the [Apache Licence v2](https://www.apache.org/licenses/LICENSE-2.0).
+
+See the `NOTICE` file for required notices and attributions.
+
+Donations
+---------
+You like Apache Commons Statistics Ranking? Then [donate back to the ASF](https://www.apache.org/foundation/contributing.html) to support the development.
+
+Additional Resources
+--------------------
+
++ [Apache Commons Homepage](https://commons.apache.org/)
++ [Apache Issue Tracker (JIRA)](https://issues.apache.org/jira/browse/STATISTICS)
++ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons)
++ `#apache-commons` IRC channel on `irc.freenode.org`
+
+[ml]:https://commons.apache.org/mail-lists.html
diff --git a/commons-statistics-ranking/pom.xml b/commons-statistics-ranking/pom.xml
new file mode 100644
index 0000000..91a1ecb
--- /dev/null
+++ b/commons-statistics-ranking/pom.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-statistics-parent</artifactId>
+ <version>1.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>commons-statistics-ranking</artifactId>
+ <version>1.1-SNAPSHOT</version>
+ <name>Apache Commons Statistics Ranking</name>
+
+ <description>Statistical rank transformations.</description>
+
+ <properties>
+ <!-- OSGi -->
+ <commons.osgi.symbolicName>org.apache.commons.statistics.ranking</commons.osgi.symbolicName>
+ <commons.osgi.export>org.apache.commons.statistics.ranking</commons.osgi.export>
+ <!-- Java 9+ -->
+ <commons.module.name>org.apache.commons.statistics.ranking</commons.module.name>
+ <!-- Workaround to avoid duplicating config files. -->
+ <statistics.parent.dir>${basedir}/..</statistics.parent.dir>
+ <statistics.jira.component>ranking</statistics.jira.component>
+ </properties>
+
+ <dependencies>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-rng-client-api</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-rng-simple</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-rng-sampling</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ </dependencies>
+
+</project>
diff --git a/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/NaNStrategy.java b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/NaNStrategy.java
new file mode 100644
index 0000000..de7c326
--- /dev/null
+++ b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/NaNStrategy.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.ranking;
+
+/**
+ * Strategies for handling {@link Double#NaN} values in rank transformations.
+ *
+ * @since 1.1
+ */
+public enum NaNStrategy {
+
+ /**
+ * NaNs are considered minimal in the ordering, equivalent to (that is, tied
+ * with) {@link Double#NEGATIVE_INFINITY}.
+ */
+ MINIMAL,
+
+ /**
+ * NaNs are considered maximal in the ordering, equivalent to (that is, tied
+ * with) {@link Double#POSITIVE_INFINITY}.
+ */
+ MAXIMAL,
+
+ /** NaNs are removed before rank transform is applied. */
+ REMOVED,
+
+ /**
+ * NaNs are left fixed "in place", that is the rank transformation is applied to
+ * the other elements in the input array, but the NaN elements are returned
+ * unchanged.
+ */
+ FIXED,
+
+ /** NaNs result in an exception. */
+ FAILED
+}
diff --git a/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/NaturalRanking.java b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/NaturalRanking.java
new file mode 100644
index 0000000..c729bd1
--- /dev/null
+++ b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/NaturalRanking.java
@@ -0,0 +1,582 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.ranking;
+
+import java.util.Arrays;
+import java.util.SplittableRandom;
+import java.util.function.DoubleUnaryOperator;
+import java.util.function.LongSupplier;
+import org.apache.commons.rng.UniformRandomProvider;
+
+/**
+ * Ranking based on the natural ordering on floating-point values.
+ *
+ * <p>{@link Double#NaN NaNs} are treated according to the configured
+ * {@link NaNStrategy} and ties are handled using the selected
+ * {@link TiesStrategy}. Configuration settings are supplied in optional
+ * constructor arguments. Defaults are {@link NaNStrategy#FAILED} and
+ * {@link TiesStrategy#AVERAGE}, respectively.
+ *
+ * <p>When using {@link TiesStrategy#RANDOM}, a generator of random bits may be
+ * supplied as a {@link LongSupplier} argument; otherwise a default is created
+ * on-demand. The source of randomness can be supplied using a method reference.
+ * The following example creates a ranking with NaN values with the highest
+ * ranking and ties resolved randomly:
+ *
+ * <pre>
+ * NaturalRanking ranking = new NaturalRanking(NaNStrategy.MAXIMAL,
+ * new SplittableRandom()::nextLong);
+ * </pre>
+ *
+ * <p>Note: Using {@link TiesStrategy#RANDOM} is not thread-safe due to the mutable
+ * generator of randomness. Instances not using random resolution of ties are
+ * thread-safe.
+ *
+ * <p>Examples:
+ *
+ * <table border="">
+ * <caption>Examples</caption>
+ * <tr><th colspan="3">
+ * Input data: [20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17]
+ * </th></tr>
+ * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
+ * <th>{@code rank(data)}</th>
+ * <tr>
+ * <td>MAXIMAL</td>
+ * <td>default (ties averaged)</td>
+ * <td>[5, 3, 6, 7, 3, 8, 9, 1, 3]</td></tr>
+ * <tr>
+ * <td>MAXIMAL</td>
+ * <td>MINIMUM</td>
+ * <td>[5, 2, 6, 7, 2, 8, 9, 1, 2]</td></tr>
+ * <tr>
+ * <td>MINIMAL</td>
+ * <td>default (ties averaged]</td>
+ * <td>[6, 4, 7, 8, 4, 9, 1.5, 1.5, 4]</td></tr>
+ * <tr>
+ * <td>REMOVED</td>
+ * <td>SEQUENTIAL</td>
+ * <td>[5, 2, 6, 7, 3, 8, 1, 4]</td></tr>
+ * <tr>
+ * <td>MINIMAL</td>
+ * <td>MAXIMUM</td>
+ * <td>[6, 5, 7, 8, 5, 9, 2, 2, 5]</td></tr>
+ * <tr>
+ * <td>MINIMAL</td>
+ * <td>MAXIMUM</td>
+ * <td>[6, 5, 7, 8, 5, 9, 2, 2, 5]</td></tr>
+ * </table>
+ *
+ * @since 1.1
+ */
+public class NaturalRanking implements RankingAlgorithm {
+ /** Default NaN strategy. */
+ private static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
+ /** Default ties strategy. */
+ private static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
+ /** Map values to positive infinity. */
+ private static final DoubleUnaryOperator ACTION_POS_INF = x -> Double.POSITIVE_INFINITY;
+ /** Map values to negative infinity. */
+ private static final DoubleUnaryOperator ACTION_NEG_INF = x -> Double.NEGATIVE_INFINITY;
+ /** Raise an exception for values. */
+ private static final DoubleUnaryOperator ACTION_ERROR = new DoubleUnaryOperator() {
+ @Override
+ public double applyAsDouble(double operand) {
+ throw new IllegalArgumentException("Invalid data: " + operand);
+ }
+ };
+
+ /** NaN strategy. */
+ private final NaNStrategy nanStrategy;
+ /** Ties strategy. */
+ private final TiesStrategy tiesStrategy;
+ /** Source of randomness when ties strategy is RANDOM.
+ * Can be null to default to a JDK implementation. */
+ private final LongSupplier randomSource;
+ /** Random generator created on demand when ties strategy is RANDOM. */
+ private UniformRandomProvider rng;
+
+ /**
+ * Creates an instance with {@link NaNStrategy#FAILED} and
+ * {@link TiesStrategy#AVERAGE}.
+ */
+ public NaturalRanking() {
+ this(DEFAULT_NAN_STRATEGY, DEFAULT_TIES_STRATEGY, null);
+ }
+
+ /**
+ * Creates an instance with {@link NaNStrategy#FAILED} and the
+ * specified @{@code tiesStrategy}.
+ *
+ * <p>If the ties strategy is {@link TiesStrategy#RANDOM RANDOM} a default
+ * source of randomness is used to resolve ties.
+ *
+ * @param tiesStrategy TiesStrategy to use.
+ */
+ public NaturalRanking(TiesStrategy tiesStrategy) {
+ this(DEFAULT_NAN_STRATEGY, tiesStrategy, null);
+ }
+
+ /**
+ * Creates an instance with the specified @{@code nanStrategy} and
+ * {@link TiesStrategy#AVERAGE}.
+ *
+ * @param nanStrategy NaNStrategy to use.
+ */
+ public NaturalRanking(NaNStrategy nanStrategy) {
+ this(nanStrategy, DEFAULT_TIES_STRATEGY, null);
+ }
+
+ /**
+ * Creates an instance with the specified @{@code nanStrategy} and the
+ * specified @{@code tiesStrategy}.
+ *
+ * <p>If the ties strategy is {@link TiesStrategy#RANDOM RANDOM} a default
+ * source of randomness is used to resolve ties.
+ *
+ * @param nanStrategy NaNStrategy to use.
+ * @param tiesStrategy TiesStrategy to use.
+ */
+ public NaturalRanking(NaNStrategy nanStrategy,
+ TiesStrategy tiesStrategy) {
+ this(nanStrategy, tiesStrategy, null);
+ }
+
+ /**
+ * Creates an instance with {@link NaNStrategy#FAILED},
+ * {@link TiesStrategy#RANDOM} and the given the source of random data.
+ *
+ * @param randomSource Source of random data.
+ */
+ public NaturalRanking(LongSupplier randomSource) {
+ this(DEFAULT_NAN_STRATEGY, TiesStrategy.RANDOM, randomSource);
+ }
+
+ /**
+ * Creates an instance with the specified @{@code nanStrategy},
+ * {@link TiesStrategy#RANDOM} and the given the source of random data.
+ *
+ * @param nanStrategy NaNStrategy to use.
+ * @param randomSource Source of random data.
+ */
+ public NaturalRanking(NaNStrategy nanStrategy,
+ LongSupplier randomSource) {
+ this(nanStrategy, TiesStrategy.RANDOM, randomSource);
+ }
+
+ /**
+ * @param nanStrategy NaNStrategy to use.
+ * @param tiesStrategy TiesStrategy to use.
+ * @param randomSource Source of random data.
+ */
+ private NaturalRanking(NaNStrategy nanStrategy,
+ TiesStrategy tiesStrategy,
+ LongSupplier randomSource) {
+ this.nanStrategy = nanStrategy;
+ this.tiesStrategy = tiesStrategy;
+ this.randomSource = randomSource;
+ }
+
+ /**
+ * Return the {@link NaNStrategy}.
+ *
+ * @return the strategy for handling NaN
+ */
+ public NaNStrategy getNanStrategy() {
+ return nanStrategy;
+ }
+
+ /**
+ * Return the {@link TiesStrategy}.
+ *
+ * @return the strategy for handling ties
+ */
+ public TiesStrategy getTiesStrategy() {
+ return tiesStrategy;
+ }
+
+ /**
+ * Rank {@code data} using the natural ordering on floating-point values, with
+ * NaN values handled according to {@code nanStrategy} and ties resolved using
+ * {@code tiesStrategy}.
+ *
+ * @throws IllegalArgumentException if the selected {@link NaNStrategy} is
+ * {@code FAILED} and a {@link Double#NaN} is encountered in the input data.
+ */
+ @Override
+ public double[] apply(double[] data) {
+ // Convert data for sorting.
+ // NaNs are counted for the FIXED strategy.
+ final int[] nanCount = {0};
+ final DataPosition[] ranks = createRankData(data, nanCount);
+
+ // Sorting will move NaNs to the end and we do not have to resolve ties in them.
+ final int nonNanSize = ranks.length - nanCount[0];
+
+ // Edge case for empty data
+ if (nonNanSize == 0) {
+ // Either NaN are left in-place or removed
+ return nanStrategy == NaNStrategy.FIXED ? data : new double[0];
+ }
+
+ Arrays.sort(ranks);
+
+ // Walk the sorted array, filling output array using sorted positions,
+ // resolving ties as we go.
+ int pos = 1;
+ final double[] out = new double[ranks.length];
+
+ DataPosition current = ranks[0];
+ out[current.getPosition()] = pos;
+
+ // Store all previous elements of a tie.
+ // Note this lags behind the length of the tie sequence by 1.
+ // In the event there are no ties this is not used.
+ final IntList tiesTrace = new IntList(ranks.length);
+
+ for (int i = 1; i < nonNanSize; i++) {
+ final DataPosition previous = current;
+ current = ranks[i];
+ if (current.compareTo(previous) > 0) {
+ // Check for a previous tie sequence
+ if (tiesTrace.size() != 0) {
+ resolveTie(out, tiesTrace, previous.getPosition());
+ }
+ pos = i + 1;
+ } else {
+ // Tie sequence. Add the matching previous element.
+ tiesTrace.add(previous.getPosition());
+ }
+ out[current.getPosition()] = pos;
+ }
+ // Handle tie sequence at end
+ if (tiesTrace.size() != 0) {
+ resolveTie(out, tiesTrace, current.getPosition());
+ }
+ // For the FIXED strategy consume the remaining NaN elements
+ if (nanStrategy == NaNStrategy.FIXED) {
+ for (int i = nonNanSize; i < ranks.length; i++) {
+ out[ranks[i].getPosition()] = Double.NaN;
+ }
+ }
+ return out;
+ }
+
+ /**
+ * Creates the rank data. If using {@link NaNStrategy#REMOVED} then NaNs are
+ * filtered. Otherwise NaNs may be mapped to an infinite value, counted to allow
+ * subsequent processing, or cause an exception to be thrown.
+ *
+ * @param data Source data.
+ * @param nanCount Output counter for NaN values.
+ * @return the rank data
+ * @throws IllegalArgumentException if the data contains NaN values when using
+ * {@link NaNStrategy#FAILED}.
+ */
+ private DataPosition[] createRankData(double[] data, final int[] nanCount) {
+ return nanStrategy == NaNStrategy.REMOVED ?
+ createNonNaNRankData(data) :
+ createMappedRankData(data, createNaNAction(nanCount));
+ }
+
+ /**
+ * Creates the NaN action.
+ *
+ * @param nanCount Output counter for NaN values.
+ * @return the operator applied to NaN values
+ */
+ private DoubleUnaryOperator createNaNAction(int[] nanCount) {
+ switch (nanStrategy) {
+ case MAXIMAL: // Replace NaNs with +INFs
+ return ACTION_POS_INF;
+ case MINIMAL: // Replace NaNs with -INFs
+ return ACTION_NEG_INF;
+ case REMOVED: // NaNs are removed
+ case FIXED: // NaNs are unchanged
+ // Count the NaNs in the data that must be handled
+ return x -> {
+ nanCount[0]++;
+ return x;
+ };
+ case FAILED:
+ return ACTION_ERROR;
+ default:
+ // this should not happen unless NaNStrategy enum is changed
+ throw new IllegalStateException();
+ }
+ }
+
+ /**
+ * Creates the rank data with NaNs removed.
+ *
+ * @param data Source data.
+ * @return the rank data
+ */
+ private static DataPosition[] createNonNaNRankData(double[] data) {
+ final DataPosition[] ranks = new DataPosition[data.length];
+ int size = 0;
+ for (final double v : data) {
+ if (!Double.isNaN(v)) {
+ ranks[size] = new DataPosition(v, size);
+ size++;
+ }
+ }
+ return size == data.length ? ranks : Arrays.copyOf(ranks, size);
+ }
+
+ /**
+ * Creates the rank data.
+ *
+ * @param data Source data.
+ * @param nanAction Mapping operator applied to NaN values.
+ * @return the rank data
+ */
+ private static DataPosition[] createMappedRankData(double[] data, DoubleUnaryOperator nanAction) {
+ final DataPosition[] ranks = new DataPosition[data.length];
+ for (int i = 0; i < data.length; i++) {
+ double v = data[i];
+ if (Double.isNaN(v)) {
+ v = nanAction.applyAsDouble(v);
+ }
+ ranks[i] = new DataPosition(v, i);
+ }
+ return ranks;
+ }
+
+ /**
+ * Resolve a sequence of ties, using the configured {@link TiesStrategy}. The
+ * input {@code ranks} array is expected to take the same value for all indices
+ * in {@code tiesTrace}. The common value is recoded according to the
+ * tiesStrategy. For example, if ranks = [5,8,2,6,2,7,1,2], tiesTrace = [2,4,7]
+ * and tiesStrategy is MINIMUM, ranks will be unchanged. The same array and
+ * trace with tiesStrategy AVERAGE will come out [5,8,3,6,3,7,1,3].
+ *
+ * <p>Note: For convenience the final index of the trace is passed as an argument;
+ * it is assumed the list is already non-empty. At the end of the method the
+ * list of indices is cleared.
+ *
+ * @param ranks Array of ranks.
+ * @param tiesTrace List of indices where {@code ranks} is constant, that is,
+ * for any i and j in {@code tiesTrace}: {@code ranks[i] == ranks[j]}.
+ * @param finalIndex The final index to add to the sequence of ties.
+ */
+ private void resolveTie(double[] ranks, IntList tiesTrace, int finalIndex) {
+ tiesTrace.add(finalIndex);
+
+ // Constant value of ranks over tiesTrace.
+ // Note: c is a rank counter starting from 1 so limited to an int.
+ final double c = ranks[tiesTrace.get(0)];
+
+ // length of sequence of tied ranks
+ final int length = tiesTrace.size();
+
+ switch (tiesStrategy) {
+ case AVERAGE: // Replace ranks with average: (lower + upper) / 2
+ fill(ranks, tiesTrace, (2 * c + length - 1) * 0.5);
+ break;
+ case MAXIMUM: // Replace ranks with maximum values
+ fill(ranks, tiesTrace, c + length - 1);
+ break;
+ case MINIMUM: // Replace ties with minimum
+ // Note that the tie sequence already has all values set to c so
+ // no requirement to fill again.
+ break;
+ case SEQUENTIAL: // Fill sequentially from c to c + length - 1
+ case RANDOM: // Fill with randomized sequential values in [c, c + length - 1]
+ // This cast is safe as c is a counter.
+ int r = (int) c;
+ if (tiesStrategy == TiesStrategy.RANDOM) {
+ tiesTrace.shuffle(getRNG());
+ }
+ final int size = tiesTrace.size();
+ for (int i = 0; i < size; i++) {
+ ranks[tiesTrace.get(i)] = r++;
+ }
+ break;
+ default: // this should not happen unless TiesStrategy enum is changed
+ throw new IllegalStateException();
+ }
+
+ tiesTrace.clear();
+ }
+
+ /**
+ * Sets {@code data[i] = value} for each i in {@code tiesTrace}.
+ *
+ * @param data Array to modify.
+ * @param tiesTrace List of index values to set.
+ * @param value Value to set.
+ */
+ private static void fill(double[] data, IntList tiesTrace, double value) {
+ final int size = tiesTrace.size();
+ for (int i = 0; i < size; i++) {
+ data[tiesTrace.get(i)] = value;
+ }
+ }
+
+ /**
+ * Gets the random generator from the source of randomness.
+ * Defaults to a system provided generator if the source of randomness is null.
+ *
+ * @return the RNG
+ */
+ private UniformRandomProvider getRNG() {
+ UniformRandomProvider r = rng;
+ if (r == null) {
+ // Use the provided source, or default to a SplittableRandom
+ final LongSupplier source = randomSource != null ?
+ randomSource :
+ new SplittableRandom()::nextLong;
+ rng = r = source::getAsLong;
+ }
+ return r;
+ }
+
+ /**
+ * An expandable list of int values. This allows tracking array positions
+ * without using boxed values in a {@code List<Integer>}.
+ */
+ private static class IntList {
+ /** The maximum size of array to allocate. */
+ private final int max;
+
+ /** The size of the list. */
+ private int size;
+ /** The list data. Initialised with space to store a tie of 2 values. */
+ private int[] data = new int[2];
+
+ /**
+ * @param max Maximum size of array to allocate. Can use the length of the parent array
+ * for which this is used to track indices.
+ */
+ IntList(int max) {
+ this.max = max;
+ }
+
+ /**
+ * Adds the value to the list.
+ *
+ * @param value the value
+ */
+ void add(int value) {
+ if (size == data.length) {
+ // Overflow safe doubling of the current size.
+ data = Arrays.copyOf(data, (int) Math.min(max, size * 2L));
+ }
+ data[size++] = value;
+ }
+
+ /**
+ * Gets the element at the specified {@code index}.
+ *
+ * @param index Element index
+ * @return the element
+ */
+ int get(int index) {
+ return data[index];
+ }
+
+ /**
+ * Gets the number of elements in the list.
+ *
+ * @return the size
+ */
+ int size() {
+ return size;
+ }
+
+ /**
+ * Clear the list.
+ */
+ void clear() {
+ size = 0;
+ }
+
+ /**
+ * Shuffle the list.
+ *
+ * @param rng Source of randomness
+ */
+ void shuffle(UniformRandomProvider rng) {
+ // Fisher-Yates shuffle
+ final int[] array = data;
+ for (int i = size; i > 1; i--) {
+ swap(array, i - 1, rng.nextInt(i));
+ }
+ }
+
+ /**
+ * Swaps the two specified elements in the specified array.
+ *
+ * @param array Data array
+ * @param i First index
+ * @param j Second index
+ */
+ private static void swap(int[] array, int i, int j) {
+ final int tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ /**
+ * Represents the position of a {@code double} value in a data array. The
+ * Comparable interface is implemented so Arrays.sort can be used to sort an
+ * array of data positions by value. Note that the implicitly defined natural
+ * ordering is NOT consistent with equals.
+ */
+ private static class DataPosition implements Comparable<DataPosition> {
+ /** Data value. */
+ private final double value;
+ /** Data position. */
+ private final int position;
+
+ /**
+ * Create an instance with the given value and position.
+ *
+ * @param value Data value.
+ * @param position Data position.
+ */
+ DataPosition(double value, int position) {
+ this.value = value;
+ this.position = position;
+ }
+
+ /**
+ * Compare this value to another.
+ * Only the <strong>values</strong> are compared.
+ *
+ * @param other the other pair to compare this to
+ * @return result of {@code Double.compare(value, other.value)}
+ */
+ @Override
+ public int compareTo(DataPosition other) {
+ return Double.compare(value, other.value);
+ }
+
+ // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
+
+ /**
+ * Returns the data position.
+ *
+ * @return position
+ */
+ int getPosition() {
+ return position;
+ }
+ }
+}
diff --git a/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/RankingAlgorithm.java b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/RankingAlgorithm.java
new file mode 100644
index 0000000..2cc2134
--- /dev/null
+++ b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/RankingAlgorithm.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.ranking;
+
+import java.util.function.UnaryOperator;
+
+/**
+ * Interface representing a rank transformation.
+ *
+ * @since 1.1
+ */
+@FunctionalInterface
+public interface RankingAlgorithm extends UnaryOperator<double[]> {
+ /**
+ * <p>Performs a rank transformation on the input data, returning an array of
+ * ranks.
+ *
+ * <p>Ranks should be 1-based: the smallest value returned in an array of ranks
+ * should be greater than or equal to one, rather than 0. Ranks should in
+ * general take integer values, though implementations may return averages or
+ * other floating point values to resolve ties in the input data.
+ *
+ * @param data Array of data to be ranked.
+ * @return an array of ranks corresponding to the elements of the input array
+ */
+ @Override
+ double[] apply(double[] data);
+}
diff --git a/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/TiesStrategy.java b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/TiesStrategy.java
new file mode 100644
index 0000000..02730c3
--- /dev/null
+++ b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/TiesStrategy.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.ranking;
+
+/**
+ * Strategies for handling tied values in rank transformations.
+ *
+ * @since 1.1
+ */
+public enum TiesStrategy {
+
+ /**
+ * Ties are assigned ranks in order of occurrence in the original array.
+ *
+ * <p>For example, {@code [1, 3, 4, 3]} is ranked as {@code [1, 2, 4, 3]}.
+ */
+ SEQUENTIAL,
+
+ /**
+ * Tied values are assigned the minimum applicable rank, or the rank of the
+ * first occurrence.
+ *
+ * <p>For example, {@code [1, 3, 4, 3]} is ranked as {@code [1, 2, 4, 2]}.
+ */
+ MINIMUM,
+
+ /**
+ * Tied values are assigned the maximum applicable rank, or the rank of the last
+ * occurrence. <p>For example, {@code [1, 3, 4, 3]} is ranked as {@code [1, 3, 4, 3]}.
+ */
+ MAXIMUM,
+
+ /**
+ * Tied values are assigned the average of the applicable ranks.
+ *
+ * <p>For example, {@code [1, 3, 4, 3]} is ranked as {@code [1, 2.5, 4, 2.5]}.
+ */
+ AVERAGE,
+
+ /**
+ * Tied values are assigned a <em>unique</em> random integral rank from among the
+ * applicable values.
+ *
+ * <p>For example, {@code [1, 3, 4, 3]} is ranked as either {@code [1, 2, 4, 3]} or
+ * {@code [1, 3, 4, 2]} where the choice is random.
+ *
+ * <p>The assigned rank will always be an integer, (inclusively) between the
+ * values returned by the {@link #MINIMUM} and {@link #MAXIMUM} strategies.
+ *
+ * <p>Use of a <em>unique</em> rank ensures that ties are resolved so that the
+ * rank order is stable.
+ */
+ RANDOM
+}
diff --git a/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/package-info.java b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/package-info.java
new file mode 100644
index 0000000..ff9a5b5
--- /dev/null
+++ b/commons-statistics-ranking/src/main/java/org/apache/commons/statistics/ranking/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Classes providing rank transformations.
+ */
+package org.apache.commons.statistics.ranking;
diff --git a/commons-statistics-ranking/src/site/resources/profile.jacoco b/commons-statistics-ranking/src/site/resources/profile.jacoco
new file mode 100644
index 0000000..a12755f
--- /dev/null
+++ b/commons-statistics-ranking/src/site/resources/profile.jacoco
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------------
+#
+# Empty file used to automatically trigger JaCoCo profile from commons parent pom
diff --git a/commons-statistics-ranking/src/site/site.xml b/commons-statistics-ranking/src/site/site.xml
new file mode 100644
index 0000000..0ad859d
--- /dev/null
+++ b/commons-statistics-ranking/src/site/site.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="Statistics">
+ <bannerRight>
+ <name>Apache Commons Statistics</name>
+ <!-- Use a full URL allows a correct banner for the modules. -->
+ <src>https://commons.apache.org/proper/commons-statistics/images/commons_statistics.small.png</src>
+ <href>https://commons.apache.org/proper/commons-statistics/index.html</href>
+ </bannerRight>
+
+ <body>
+ <menu name="Statistics Ranking">
+ <item name="Overview" href="index.html"/>
+ <item name="Latest API docs (development)"
+ href="apidocs/index.html"/>
+ <!-- TODO: Uncomment for initial release
+ <item name="Javadoc (1.1 release)"
+ href="https://commons.apache.org/statistics/commons-statistics-ranking/javadocs/api-1.1/index.html"/>
+ -->
+ </menu>
+
+ </body>
+</project>
diff --git a/commons-statistics-ranking/src/site/xdoc/index.xml b/commons-statistics-ranking/src/site/xdoc/index.xml
new file mode 100644
index 0000000..0580e9e
--- /dev/null
+++ b/commons-statistics-ranking/src/site/xdoc/index.xml
@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<document>
+
+ <properties>
+ <title>Apache Commons Statistics Ranking</title>
+ </properties>
+
+ <body>
+
+ <section name="Apache Commons Statistics: Ranking" href="summary">
+ <p>
+ Apache Commons Statistics provides statistical rank transformations.
+ </p>
+
+ <p>
+ Example:
+ </p>
+
+<source class="prettyprint">import org.apache.commons.statistics.ranking.NaturalRanking;
+
+NaturalRanking ranking = new NaturalRanking();
+ranking.apply(new double[] {5, 6, 7, 8}); // 1, 2, 3, 4
+ranking.apply(new double[] {8, 5, 7, 6}); // 4, 1, 3, 2
+ranking.apply(new double[] {6, 5, 6, 7}); // 2.5, 1, 2.5, 4
+</source>
+
+ <p>
+ Browse the <a href="apidocs/index.html">Javadoc</a> for more information.
+ </p>
+ </section>
+
+ </body>
+
+</document>
diff --git a/commons-statistics-ranking/src/test/java/org/apache/commons/statistics/ranking/NaturalRankingTest.java b/commons-statistics-ranking/src/test/java/org/apache/commons/statistics/ranking/NaturalRankingTest.java
new file mode 100644
index 0000000..a0acb7c
--- /dev/null
+++ b/commons-statistics-ranking/src/test/java/org/apache/commons/statistics/ranking/NaturalRankingTest.java
@@ -0,0 +1,611 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.ranking;
+
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.List;
+import java.util.function.LongSupplier;
+import java.util.stream.Collectors;
+import java.util.stream.DoubleStream;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import org.apache.commons.math3.stat.inference.ChiSquareTest;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.PermutationSampler;
+import org.apache.commons.rng.simple.RandomSource;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.EnumSource;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Test cases for {@link NaturalRanking}.
+ */
+class NaturalRankingTest {
+
+ /** Examples data in the {@link NaturalRanking} class javadoc. */
+ private static final double[] EXAMPLE_DATA = {20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17};
+ private static final double[] TIES_FIRST = {0, 0, 2, 1, 4};
+ private static final double[] TIES_LAST = {4, 4, 1, 0};
+ private static final double[] MULTIPLE_NANS = {0, 1, Double.NaN, Double.NaN};
+ private static final double[] MULTIPLE_TIES = {3, 2, 5, 5, 6, 6, 1};
+ private static final double[] ALL_SAME = {0, 0, 0, 0};
+
+ /**
+ * Test the strategies are correctly configured using the various constructors.
+ */
+ @Test
+ void testProperties() {
+ final TiesStrategy defaultTs = TiesStrategy.AVERAGE;
+ final NaNStrategy defaultNs = NaNStrategy.FAILED;
+ final LongSupplier randomSource = null;
+ NaturalRanking ranking;
+
+ ranking = new NaturalRanking();
+ Assertions.assertEquals(defaultTs, ranking.getTiesStrategy());
+ Assertions.assertEquals(defaultNs, ranking.getNanStrategy());
+ ranking = new NaturalRanking(randomSource);
+ Assertions.assertEquals(TiesStrategy.RANDOM, ranking.getTiesStrategy());
+ Assertions.assertEquals(defaultNs, ranking.getNanStrategy());
+
+ final TiesStrategy[] ts = TiesStrategy.values();
+ final NaNStrategy[] ns = NaNStrategy.values();
+ for (final NaNStrategy n : ns) {
+ ranking = new NaturalRanking(n);
+ Assertions.assertEquals(defaultTs, ranking.getTiesStrategy());
+ Assertions.assertEquals(n, ranking.getNanStrategy());
+ ranking = new NaturalRanking(n, randomSource);
+ Assertions.assertEquals(TiesStrategy.RANDOM, ranking.getTiesStrategy());
+ Assertions.assertEquals(n, ranking.getNanStrategy());
+ for (final TiesStrategy t : ts) {
+ ranking = new NaturalRanking(n, t);
+ Assertions.assertEquals(t, ranking.getTiesStrategy());
+ Assertions.assertEquals(n, ranking.getNanStrategy());
+ }
+ }
+ for (final TiesStrategy t : ts) {
+ ranking = new NaturalRanking(t);
+ Assertions.assertEquals(t, ranking.getTiesStrategy());
+ Assertions.assertEquals(defaultNs, ranking.getNanStrategy());
+ }
+ }
+
+ @Test
+ void testNullStrategy() {
+ final double[] data = new double[5];
+ final NaturalRanking r1 = new NaturalRanking((NaNStrategy)null);
+ Assertions.assertThrows(NullPointerException.class, () -> r1.apply(data));
+ final NaturalRanking r2 = new NaturalRanking((TiesStrategy)null);
+ Assertions.assertThrows(NullPointerException.class, () -> r2.apply(data));
+ }
+
+ /**
+ * Test the ranks on the standard test cases.
+ *
+ * <p>If the expected result is null then the algorithm is expected to throw an
+ * {@link IllegalArgumentException}.
+ *
+ * @param ranking Ranking algorithm
+ * @param example Ranks for Ranks for the example data
+ * @param tiesFirst Ranks for the ties first data
+ * @param tiesLast Ranks for the ties last data
+ * @param multipleNaNs Ranks for the multiple NaNs data
+ * @param multipleTies Ranks for the multiple ties data
+ * @param allSame Ranks for the all same data
+ */
+ @ParameterizedTest
+ @MethodSource
+ void testRanks(RankingAlgorithm ranking, double[] example, double[] tiesFirst, double[] tiesLast,
+ double[] multipleNaNs, double[] multipleTies, double[] allSame) {
+ assertRanks(ranking, EXAMPLE_DATA, example, "Example data");
+ assertRanks(ranking, TIES_FIRST, tiesFirst, "Ties first");
+ assertRanks(ranking, TIES_LAST, tiesLast, "Ties last");
+ assertRanks(ranking, MULTIPLE_NANS, multipleNaNs, "Multiple NaNs");
+ assertRanks(ranking, MULTIPLE_TIES, multipleTies, "Multiple ties");
+ assertRanks(ranking, ALL_SAME, allSame, "All same");
+ }
+
+ /**
+ * Provide expected results for the standard test cases using different algorithms.
+ *
+ * @return the arguments
+ */
+ static Stream<Arguments> testRanks() {
+ final Stream.Builder<Arguments> builder = Stream.builder();
+ builder.add(Arguments.of(
+ // Default: Ties averaged, NaNs failed
+ new NaturalRanking(),
+ null,
+ new double[] {1.5, 1.5, 4, 3, 5},
+ new double[] {3.5, 3.5, 2, 1},
+ null,
+ new double[] {3, 2, 4.5, 4.5, 6.5, 6.5, 1},
+ new double[] {2.5, 2.5, 2.5, 2.5}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.FAILED),
+ null,
+ new double[] {1.5, 1.5, 4, 3, 5},
+ new double[] {3.5, 3.5, 2, 1},
+ null,
+ new double[] {3, 2, 4.5, 4.5, 6.5, 6.5, 1},
+ new double[] {2.5, 2.5, 2.5, 2.5}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE),
+ null,
+ new double[] {1.5, 1.5, 4, 3, 5},
+ new double[] {3.5, 3.5, 2, 1},
+ null,
+ new double[] {3, 2, 4.5, 4.5, 6.5, 6.5, 1},
+ new double[] {2.5, 2.5, 2.5, 2.5}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(TiesStrategy.AVERAGE),
+ null,
+ new double[] {1.5, 1.5, 4, 3, 5},
+ new double[] {3.5, 3.5, 2, 1},
+ null,
+ new double[] {3, 2, 4.5, 4.5, 6.5, 6.5, 1},
+ new double[] {2.5, 2.5, 2.5, 2.5}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.MAXIMAL, TiesStrategy.MINIMUM),
+ new double[] {5, 2, 6, 7, 2, 8, 9, 1, 2},
+ new double[] {1, 1, 4, 3, 5},
+ new double[] {3, 3, 2, 1},
+ new double[] {1, 2, 3, 3},
+ new double[] {3, 2, 4, 4, 6, 6, 1},
+ new double[] {1, 1, 1, 1}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.REMOVED, TiesStrategy.SEQUENTIAL),
+ new double[] {5, 2, 6, 7, 3, 8, 1, 4},
+ new double[] {1, 2, 4, 3, 5},
+ new double[] {3, 4, 2, 1},
+ new double[] {1, 2},
+ new double[] {3, 2, 4, 5, 6, 7, 1},
+ new double[] {1, 2, 3, 4}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.MINIMAL, TiesStrategy.MAXIMUM),
+ new double[] {6, 5, 7, 8, 5, 9, 2, 2, 5},
+ new double[] {2, 2, 4, 3, 5},
+ new double[] {4, 4, 2, 1},
+ new double[] {3, 4, 2, 2},
+ new double[] {3, 2, 5, 5, 7, 7, 1},
+ new double[] {4, 4, 4, 4}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.MINIMAL),
+ new double[] {6, 4, 7, 8, 4, 9, 1.5, 1.5, 4},
+ new double[] {1.5, 1.5, 4, 3, 5},
+ new double[] {3.5, 3.5, 2, 1},
+ new double[] {3, 4, 1.5, 1.5},
+ new double[] {3, 2, 4.5, 4.5, 6.5, 6.5, 1},
+ new double[] {2.5, 2.5, 2.5, 2.5}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.FAILED),
+ null,
+ new double[] {1.5, 1.5, 4, 3, 5},
+ new double[] {3.5, 3.5, 2, 1},
+ null,
+ new double[] {3, 2, 4.5, 4.5, 6.5, 6.5, 1},
+ new double[] {2.5, 2.5, 2.5, 2.5}
+ ));
+ return builder.build();
+ }
+
+ /**
+ * Test the ranks on the standard test cases. This method requires the output ranking
+ * to have unique indices using a natural sequence from one. The order within groups
+ * is arbitrary. All elements of each successive group must be ranked after the previous
+ * group.
+ *
+ * <p>If the expected result is null then the algorithm is expected to throw an
+ * {@link IllegalArgumentException}.
+ *
+ * @param ranking Ranking algorithm
+ * @param example Rank groups for Rank groups for the example data
+ * @param tiesFirst Rank groups for the ties first data
+ * @param tiesLast Rank groups for the ties last data
+ * @param multipleNaNs Rank groups for the multiple NaNs data
+ * @param multipleTies Rank groups for the multiple ties data
+ * @param allSame Rank groups for the all same data
+ */
+ @ParameterizedTest
+ @MethodSource
+ void testRanksTiesRandom(RankingAlgorithm ranking, int[] example, int[] tiesFirst, int[] tiesLast,
+ int[] multipleNaNs, int[] multipleTies, int[] allSame) {
+ assertRanks(ranking, EXAMPLE_DATA, example, "Example data");
+ assertRanks(ranking, TIES_FIRST, tiesFirst, "Ties first");
+ assertRanks(ranking, TIES_LAST, tiesLast, "Ties last");
+ assertRanks(ranking, MULTIPLE_NANS, multipleNaNs, "Multiple NaNs");
+ assertRanks(ranking, MULTIPLE_TIES, multipleTies, "Multiple ties");
+ assertRanks(ranking, ALL_SAME, allSame, "All same");
+ }
+
+ /**
+ * Provide expected results for the standard test cases using different algorithms.
+ *
+ * @return the arguments
+ */
+ static Stream<Arguments> testRanksTiesRandom() {
+ final UniformRandomProvider rng = RandomSource.SPLIT_MIX_64.create();
+ final Stream.Builder<Arguments> builder = Stream.builder();
+ builder.add(Arguments.of(
+ new NaturalRanking(rng::nextLong),
+ null,
+ new int[] {1, 1, 3, 2, 4},
+ new int[] {3, 3, 2, 1},
+ null,
+ new int[] {3, 2, 4, 4, 5, 5, 1},
+ new int[] {1, 1, 1, 1}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.FIXED, rng::nextLong),
+ new int[] {3, 2, 4, 5, 2, 6, 0, 1, 2},
+ new int[] {1, 1, 3, 2, 4},
+ new int[] {3, 3, 2, 1},
+ new int[] {1, 2, 0, 0},
+ new int[] {3, 2, 4, 4, 5, 5, 1},
+ new int[] {1, 1, 1, 1}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.REMOVED, rng::nextLong),
+ new int[] {3, 2, 4, 5, 2, 6, -1, 1, 2},
+ new int[] {1, 1, 3, 2, 4},
+ new int[] {3, 3, 2, 1},
+ new int[] {1, 2, -1, -1},
+ new int[] {3, 2, 4, 4, 5, 5, 1},
+ new int[] {1, 1, 1, 1}
+ ));
+ // The test method works even when not using TiesStrategy.RANDOM but the
+ // ties strategy must output unique indices so use SEQUENTIAL.
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.MAXIMAL, TiesStrategy.SEQUENTIAL),
+ new int[] {5, 2, 6, 7, 3, 8, 9, 1, 4},
+ new int[] {1, 2, 4, 3, 5},
+ new int[] {3, 4, 2, 1},
+ new int[] {1, 2, 3, 4},
+ new int[] {3, 2, 4, 5, 6, 7, 1},
+ new int[] {1, 2, 3, 4}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.REMOVED, TiesStrategy.SEQUENTIAL),
+ new int[] {5, 2, 6, 7, 3, 8, -1, 1, 4},
+ new int[] {1, 2, 4, 3, 5},
+ new int[] {3, 4, 2, 1},
+ new int[] {1, 2, -1, -1},
+ new int[] {3, 2, 4, 5, 6, 7, 1},
+ new int[] {1, 2, 3, 4}
+ ));
+ builder.add(Arguments.of(
+ new NaturalRanking(NaNStrategy.MINIMAL, TiesStrategy.SEQUENTIAL),
+ new int[] {5, 2, 6, 7, 3, 8, 1, 1, 4},
+ new int[] {1, 2, 4, 3, 5},
+ new int[] {3, 4, 2, 1},
+ new int[] {2, 3, 1, 1},
+ new int[] {3, 2, 4, 5, 6, 7, 1},
+ new int[] {1, 2, 3, 4}
+ ));
+ return builder.build();
+ }
+
+ /**
+ * Test ranking of data with no ties. This should work for any ties strategy.
+ */
+ @ParameterizedTest
+ @EnumSource(value = TiesStrategy.class)
+ void testNoTies(TiesStrategy tiesStrategy) {
+ // Ordered values required for the test. These are randomized below.
+ final double[] values = {-13, -6, 1, 13.5, 66.9};
+ final int[] indices = PermutationSampler.natural(values.length);
+ final UniformRandomProvider rng = RandomSource.SPLIT_MIX_64.create();
+ final double[] data = new double[values.length];
+ final double[] expected = new double[values.length];
+ final NaturalRanking ranking = new NaturalRanking(tiesStrategy);
+ for (int i = 0; i < 3; i++) {
+ PermutationSampler.shuffle(rng, indices);
+ for (int j = 0; j < values.length; j++) {
+ data[j] = values[indices[j]];
+ expected[j] = indices[j] + 1;
+ }
+ assertRanks(ranking, data, expected, tiesStrategy.toString());
+ }
+ }
+
+ /**
+ * Test ranking of data that contains NaN and positive and negative infinities.
+ */
+ @ParameterizedTest
+ @MethodSource
+ void testNaNsAndInfinities(NaNStrategy nanStrategy, double[] expected) {
+ final double[] data = {0, Double.POSITIVE_INFINITY, Double.NaN, Double.NEGATIVE_INFINITY};
+ assertRanks(new NaturalRanking(nanStrategy), data, expected, nanStrategy.toString());
+ }
+
+ static Stream<Arguments> testNaNsAndInfinities() {
+ final Stream.Builder<Arguments> builder = Stream.builder();
+ builder.add(Arguments.of(NaNStrategy.MAXIMAL, new double[] {2, 3.5, 3.5, 1}));
+ builder.add(Arguments.of(NaNStrategy.MINIMAL, new double[] {3, 4, 1.5, 1.5}));
+ builder.add(Arguments.of(NaNStrategy.REMOVED, new double[] {2, 3, 1}));
+ builder.add(Arguments.of(NaNStrategy.FIXED, new double[] {2, 3, Double.NaN, 1}));
+ builder.add(Arguments.of(NaNStrategy.FAILED, null));
+ return builder.build();
+ }
+
+ /**
+ * Test ranking of no data. This is enumerated for all NaN strategies to ensure
+ * all methods searching for NaN handle no data.
+ */
+ @ParameterizedTest
+ @EnumSource(value = NaNStrategy.class)
+ void testEmpty(NaNStrategy nanStrategy) {
+ final double[] data = {};
+ assertRanks(new NaturalRanking(nanStrategy), data, data, nanStrategy.toString());
+ }
+
+ /**
+ * Test ranking of only NaN data.
+ */
+ @ParameterizedTest
+ @MethodSource
+ void testNaN(NaNStrategy nanStrategy, double[] expected) {
+ final double[] data = {Double.NaN, Double.NaN};
+ assertRanks(new NaturalRanking(nanStrategy), data, expected, nanStrategy.toString());
+ }
+
+ static Stream<Arguments> testNaN() {
+ final Stream.Builder<Arguments> builder = Stream.builder();
+ builder.add(Arguments.of(NaNStrategy.MAXIMAL, new double[] {1.5, 1.5}));
+ builder.add(Arguments.of(NaNStrategy.MINIMAL, new double[] {1.5, 1.5}));
+ builder.add(Arguments.of(NaNStrategy.REMOVED, new double[0]));
+ builder.add(Arguments.of(NaNStrategy.FIXED, new double[] {Double.NaN, Double.NaN}));
+ builder.add(Arguments.of(NaNStrategy.FAILED, null));
+ return builder.build();
+ }
+
+ /**
+ * Test the random allocation of ties is uniform for each tied-position.
+ *
+ * @param before Number of values before the length of ties
+ * @param ties Number of tied values
+ * @param after Number of values after the length of ties
+ * @param seed Random seed (ensures test does not fail the build due to randomness)
+ */
+ @ParameterizedTest
+ @CsvSource({
+ "0, 10, 0, 23657426436",
+ "1, 8, 0, 21637427438",
+ "0, 6, 3, -9879847797",
+ "1, 12, 1, -253672793297",
+ })
+ void testRandom(int before, int ties, int after, long seed) {
+ Assertions.assertTrue(ties > 0);
+ final int n = 1000;
+ final DoubleStream.Builder builder = DoubleStream.builder();
+ final double[] value = {0};
+ IntStream.range(0, before).forEach(i -> builder.add(value[0]++));
+ IntStream.range(0, ties).forEach(i -> builder.add(value[0]));
+ IntStream.range(0, after).forEach(i -> builder.add(++value[0]));
+ final double[] data = builder.build().toArray();
+ final UniformRandomProvider rng = RandomSource.SPLIT_MIX_64.create(seed);
+ final NaturalRanking ranking = new NaturalRanking(rng::nextLong);
+ final int k = before + 1;
+ final int m = before + ties;
+ // Frequency of ranks for each tied position in the data
+ final long[][] counts = new long[ties][ties];
+ for (int i = 0; i < n; i++) {
+ final double[] ranks = ranking.apply(data);
+ int j = 0;
+ for (; j < before; j++) {
+ Assertions.assertEquals(j + 1, ranks[j]);
+ }
+ for (; j < m; j++) {
+ counts[j - before][(int)ranks[j] - k]++;
+ }
+ for (; j < ranks.length; j++) {
+ Assertions.assertEquals(j + 1, ranks[j]);
+ }
+ }
+ final double p = new ChiSquareTest().chiSquareTest(counts);
+ Assertions.assertFalse(p < 1e-3, () -> "p-value too large: " + p);
+ }
+
+ /**
+ * Test random allocation of ties works without a supplied source of randomness.
+ */
+ @Test
+ void testDefaultRandom() {
+ // This is big enough the test should never fail to create 2 different results
+ final double[] data = new double[1000];
+ Arrays.fill(data, 1.23);
+ final NaturalRanking ranking = new NaturalRanking(TiesStrategy.RANDOM);
+ final double[] ranks1 = ranking.apply(data);
+ final double[] ranks2 = ranking.apply(data);
+ Assertions.assertFalse(Arrays.equals(ranks1, ranks2));
+ final double[] expected = IntStream.rangeClosed(1, data.length).asDoubleStream().toArray();
+ Arrays.sort(ranks1);
+ Arrays.sort(ranks2);
+ Assertions.assertArrayEquals(expected, ranks1);
+ Assertions.assertArrayEquals(expected, ranks2);
+ }
+
+ /**
+ * Assert the data ranks created by the algorithm are equal to the expected
+ * ranks. The input data is tested to ensure it is unchanged (i.e. the ranking
+ * does not destructively modify the input data). The expected ranks are passed
+ * through the algorithm and the result must be unchanged thus ensuring the
+ * algorithm is stable.
+ *
+ * @param ranking Ranking algorithm
+ * @param data Input data
+ * @param expected Expected ranks (if null the algorithm is expected to raise an {@link IllegalArgumentException})
+ * @param msg Prefix for any assertion failure message
+ */
+ private static void assertRanks(RankingAlgorithm ranking, double[] data, double[] expected, String msg) {
+ if (expected == null) {
+ Assertions.assertThrows(IllegalArgumentException.class, () -> ranking.apply(data));
+ } else {
+ final double[] original = data.clone();
+ final double[] ranks = ranking.apply(data);
+ Assertions.assertArrayEquals(original, data, () -> msg + ": Data was destructively modified");
+ Assertions.assertArrayEquals(expected, ranks, () -> msg + ": Ranking failed");
+ final double[] ranks2 = ranking.apply(ranks);
+ Assertions.assertArrayEquals(ranks, ranks2, () -> msg + ": Repeat ranking changed the result");
+ }
+ }
+
+ /**
+ * Assert the data ranks created by the algorithm are assigned to the expected
+ * groups. The input data is tested to ensure it is unchanged (i.e. the ranking
+ * does not destructively modify the input data). The expected ranks are passed
+ * through the algorithm and the result must be unchanged thus ensuring the
+ * algorithm is stable.
+ *
+ * <p>Groups must use a sequential ordering from 1.
+ * Any negative expected group marks data to be removed (e.g. NaNs).
+ * Any group of zero marks data to be left unchanged (e.g. NaNs).
+ * Note that the current test assumes the removed and unchanged options are mutually exclusive.
+ * Groups are mapped to an expected rank using a sequential ordering from 1.
+ * The order within a group can be random.
+ *
+ * For example:
+ * <pre>
+ * groups: [1, 2, 2, 2, 3]
+ *
+ * [1, 2, 3, 4, 5] : pass
+ * [1, 4, 3, 2, 5] : pass
+ * [1, 2, 4, 3, 5] : pass
+ * [1, 3, 3, 3, 5] : fail
+ * [1, 2, 2, 4, 5] : fail
+ * [1, 2, 3, 4, 4] : fail
+ * [1, 2, 3, 4, 6] : fail
+ * </pre>
+ *
+ * @param ranking Ranking algorithm
+ * @param data Input data
+ * @param expectedGroups Expected groups (if null the algorithm is expected to raise an {@link IllegalArgumentException})
+ * @param msg Prefix for any assertion failure message
+ */
+ private static void assertRanks(RankingAlgorithm ranking, double[] data, int[] expectedGroup, String msg) {
+ if (expectedGroup == null) {
+ Assertions.assertThrows(IllegalArgumentException.class, () -> ranking.apply(data));
+ } else {
+ Assertions.assertEquals(data.length, expectedGroup.length, "Groups must be assigned to all data");
+
+ final double[] original = data.clone();
+ final double[] ranks = ranking.apply(data);
+ Assertions.assertArrayEquals(original, data, () -> msg + ": Data was destructively modified");
+ final double[] ranks2 = ranking.apply(ranks);
+ Assertions.assertArrayEquals(ranks, ranks2, () -> msg + ": Repeat ranking changed the result");
+
+ // TODO: Simplify the collation of groups into sets.
+
+ // Check groups
+ int max = 0;
+ int numberOfElements = 0;
+ int unchanged = 0;
+ int removed = 0;
+ for (int i = 0; i < expectedGroup.length; i++) {
+ if (expectedGroup[i] > 0) {
+ max = Math.max(max, expectedGroup[i]);
+ // Reduce to only the expected elements.
+ // This filters unchanged/removed elements.
+ expectedGroup[numberOfElements] = expectedGroup[i];
+ numberOfElements++;
+ } else if (expectedGroup[i] == 0) {
+ Assertions.assertEquals(data[i], ranks[i], "Element was changed");
+ // Flag for removal
+ ranks[i] = -1;
+ unchanged++;
+ } else {
+ removed++;
+ }
+ }
+ Assertions.assertTrue(removed == 0 || unchanged == 0, "removed and unchanged options are mutually exclusive");
+ Assertions.assertEquals(ranks.length, data.length - removed, "Did not remove expected number of elements");
+ Assertions.assertTrue(max <= numberOfElements, "Maximum group above the number of valid elements");
+
+ if (unchanged != 0) {
+ // Unchanged elements are not removed by the ranking algorithm.
+ // These have already been asserted as unchanged so we remove them
+ // so only the grouped elements remain.
+ int j = 0;
+ for (int i = 0; i < ranks.length; i++) {
+ if (ranks[i] < 0) {
+ continue;
+ }
+ ranks[j++] = ranks[i];
+ }
+ Assertions.assertEquals(numberOfElements, j, "Error removing unchanged elements");
+ }
+
+ // Count groups sizes
+ final int[] sizes = new int[max + 1];
+ for (int i = 0; i < numberOfElements; i++) {
+ sizes[expectedGroup[i]]++;
+ }
+ // Each must be non-zero
+ for (int i = 1; i <= max; i++) {
+ final int index = i;
+ Assertions.assertNotEquals(0, sizes[i], () -> "Empty group: " + index);
+ }
+
+ // Here we have a number of groups with non-zero sizes.
+ // The expected ranks must be sequential from 1.
+ // Create a BitSet for each group filled with bits enabled for each rank
+ // within the group. This is typically a single value or a range: [1], [2, 3], [4], etc.
+ // Store the set in an array using the rank as the index to allow look-up of
+ // the set from the rank.
+ final int[] rankIndex = {1};
+ final BitSet[] rankToGroup = new BitSet[data.length + 1];
+ final List<BitSet> groups =
+ IntStream.rangeClosed(1, max)
+ .mapToObj(i -> {
+ final BitSet bs = new BitSet();
+ for (int j = 0; j < sizes[i]; j++) {
+ bs.set(rankIndex[0]);
+ rankToGroup[rankIndex[0]] = bs;
+ rankIndex[0]++;
+ }
+ return bs;
+ }).collect(Collectors.toList());
+
+ // For the actual rank, map back to the group and check it is allowed.
+ for (int i = 0; i < numberOfElements; i++) {
+ final double r = ranks[i];
+ Assertions.assertTrue((int) r == r, "Non-integer rank: " + r);
+ final int rank = (int) r;
+ final BitSet groupSet = rankToGroup[rank];
+ final int group = expectedGroup[i];
+ Assertions.assertTrue(groupSet.get(rank),
+ () -> String.format("Unexpected rank %d in group %d", rank, group));
+ groupSet.clear(rank);
+ }
+
+ // Check all groups should now be empty
+ groups.forEach(g -> Assertions.assertEquals(0, g.cardinality(), "Non-empty group"));
+ }
+ }
+}
diff --git a/commons-statistics-ranking/src/test/java/org/apache/commons/statistics/ranking/UserGuideTest.java b/commons-statistics-ranking/src/test/java/org/apache/commons/statistics/ranking/UserGuideTest.java
new file mode 100644
index 0000000..03c43d1
--- /dev/null
+++ b/commons-statistics-ranking/src/test/java/org/apache/commons/statistics/ranking/UserGuideTest.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.statistics.ranking;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Test code used in the ranking section of the user guide.
+ */
+class UserGuideTest {
+ @Test
+ void testRanking() {
+ NaturalRanking ranking = new NaturalRanking();
+ Assertions.assertArrayEquals(new double[] {1, 2, 3, 4}, ranking.apply(new double[] {5, 6, 7, 8}));
+ Assertions.assertArrayEquals(new double[] {4, 1, 3, 2}, ranking.apply(new double[] {8, 5, 7, 6}));
+ Assertions.assertArrayEquals(new double[] {2.5, 1, 2.5, 4}, ranking.apply(new double[] {6, 5, 6, 7}));
+ }
+}
diff --git a/pom.xml b/pom.xml
index 6abb936..677f932 100644
--- a/pom.xml
+++ b/pom.xml
@@ -623,6 +623,7 @@ This is avoided by creating an empty directory when svn is not available.
<modules>
<module>commons-statistics-distribution</module>
+ <module>commons-statistics-ranking</module>
<module>commons-statistics-regression</module>
</modules>
diff --git a/src/conf/pmd/pmd-ruleset.xml b/src/conf/pmd/pmd-ruleset.xml
index 16c2da5..ce00cc4 100644
--- a/src/conf/pmd/pmd-ruleset.xml
+++ b/src/conf/pmd/pmd-ruleset.xml
@@ -124,6 +124,12 @@
value="./ancestor-or-self::ClassOrInterfaceDeclaration[@SimpleName='AbstractContinuousDistribution']"/>
</properties>
</rule>
+ <rule ref="category/java/design.xml/GodClass">
+ <properties>
+ <property name="violationSuppressXPath"
+ value="./ancestor-or-self::ClassOrInterfaceDeclaration[@SimpleName='NaturalRanking']"/>
+ </properties>
+ </rule>
<rule ref="category/java/errorprone.xml/AvoidLiteralsInIfCondition">
<properties>
@@ -132,4 +138,12 @@
</properties>
</rule>
+ <rule ref="category/java/errorprone.xml/AvoidFieldNameMatchingMethodName">
+ <properties>
+ <!-- size() method return the list size. -->
+ <property name="violationSuppressXPath"
+ value="./ancestor-or-self::ClassOrInterfaceDeclaration[@SimpleName='IntList']"/>
+ </properties>
+ </rule>
+
</ruleset>
diff --git a/src/conf/spotbugs/spotbugs-exclude-filter.xml b/src/conf/spotbugs/spotbugs-exclude-filter.xml
index 629ddcb..629481e 100644
--- a/src/conf/spotbugs/spotbugs-exclude-filter.xml
+++ b/src/conf/spotbugs/spotbugs-exclude-filter.xml
@@ -62,4 +62,9 @@
<Bug pattern="FL_FLOATS_AS_LOOP_COUNTERS" />
</Match>
+ <Match>
+ <Class name="org.apache.commons.statistics.ranking.NaturalRanking$DataPosition" />
+ <Bug pattern="EQ_COMPARETO_USE_OBJECT_EQUALS" />
+ </Match>
+
</FindBugsFilter>