You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by er...@apache.org on 2021/09/26 23:08:47 UTC

[commons-math] branch master updated (acfc270 -> 3a5cf27)

This is an automated email from the ASF dual-hosted git repository.

erans pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/commons-math.git.


    from acfc270  Upgrade dependency.
     new b44c261  Javadoc.
     new d4de59a  MATH-1462: Reproduce the infinite quantile bug
     new 3a5cf27  MATH-1462: Extend workaround for bins that contain up to 3 values.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../legacy/distribution/EmpiricalDistribution.java | 39 +++++++++-------------
 .../distribution/EmpiricalDistributionTest.java    | 23 +++++++++++++
 src/changes/changes.xml                            |  3 ++
 3 files changed, 42 insertions(+), 23 deletions(-)

[commons-math] 03/03: MATH-1462: Extend workaround for bins that contain up to 3 values.

Posted by er...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

erans pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-math.git

commit 3a5cf279bbeed85a169ba4480b6dea10cb7a7beb
Author: Gilles Sadowski <gi...@gmail.com>
AuthorDate: Mon Sep 27 01:02:54 2021 +0200

    MATH-1462: Extend workaround for bins that contain up to 3 values.
---
 .../math4/legacy/distribution/EmpiricalDistribution.java    |  5 +++--
 .../legacy/distribution/EmpiricalDistributionTest.java      | 13 ++++++-------
 src/changes/changes.xml                                     |  3 +++
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java b/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
index 7929378..cca2c19 100644
--- a/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
+++ b/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
@@ -409,7 +409,8 @@ public final class EmpiricalDistribution extends AbstractRealDistribution
             ++i;
         }
 
-        final ContinuousDistribution kernel = getKernel(binStats.get(i));
+        final SummaryStatistics stats = binStats.get(i);
+        final ContinuousDistribution kernel = getKernel(stats);
         final double kB = kB(i);
         final double[] binBounds = getUpperBounds();
         final double lower = i == 0 ? min : binBounds[i - 1];
@@ -546,7 +547,7 @@ public final class EmpiricalDistribution extends AbstractRealDistribution
      */
     private static Function<SummaryStatistics, ContinuousDistribution> defaultKernel() {
         return stats -> {
-            if (stats.getN() <= 1 ||
+            if (stats.getN() <= 3 ||
                 stats.getVariance() == 0) {
                 return new ConstantContinuousDistribution(stats.getMean());
             } else {
diff --git a/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java b/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java
index 4f26e4e..addd94a 100644
--- a/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java
+++ b/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java
@@ -489,13 +489,12 @@ public final class EmpiricalDistributionTest extends RealDistributionAbstractTes
             6212, 5961, 711
         };
 
-        final EmpiricalDistribution ed = EmpiricalDistribution.from(1000, data);
+        final double p = 0.32;
+        for (int i = 745; i <= 1100; i++) {
+            final EmpiricalDistribution ed = EmpiricalDistribution.from(i, data);
+            final double v = ed.inverseCumulativeProbability(p);
 
-        double v;
-        double p;
-
-        p = 0.32;
-        v = ed.inverseCumulativeProbability(p);
-        Assert.assertTrue("p=" + p + " => v=" + v, Double.isFinite(v));
+            Assert.assertTrue("p=" + p + " => v=" + v, Double.isFinite(v));
+        }
     }
 }
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 595ebe8..5c276ce 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -97,6 +97,9 @@ Caveat:
  nightmare was one of the main reasons for creating more focused
  components.]
 ">
+      <action dev="erans" type="fix" issue="MATH-1462">
+        "EmpiricalDistribution": Use constant kernel for bins that contain up to 3 values.
+      </action>
       <action dev="aherbert" due-to="Arturo Bernal" type="update">
         Simplify assertions with simpler equivalent.
       </action>

[commons-math] 01/03: Javadoc.

Posted by er...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

erans pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-math.git

commit b44c2610e0991cbe155d647065abf11d3d39fa8d
Author: Gilles Sadowski <gi...@gmail.com>
AuthorDate: Sat Sep 18 03:05:51 2021 +0200

    Javadoc.
---
 .../legacy/distribution/EmpiricalDistribution.java | 34 +++++++++-------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java b/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
index 0c72c0a..7929378 100644
--- a/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
+++ b/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
@@ -34,25 +34,25 @@ import org.apache.commons.math4.legacy.core.jdkmath.AccurateMath;
 /**
  * <p>Represents an <a href="http://en.wikipedia.org/wiki/Empirical_distribution_function">
  * empirical probability distribution</a>: Probability distribution derived
- * from observed data without making any assumptions about the functional form
- * of the population distribution that the data come from.</p>
+ * from observed data without making any assumptions about the functional
+ * form of the population distribution that the data come from.</p>
  *
  * <p>An {@code EmpiricalDistribution} maintains data structures called
  * <i>distribution digests</i> that describe empirical distributions and
  * support the following operations:
  * <ul>
- *  <li>loading the distribution from a file of observed data values</li>
- *  <li>dividing the input data into "bin ranges" and reporting bin frequency
- *      counts (data for histogram)</li>
- *  <li>reporting univariate statistics describing the full set of data values
- *      as well as the observations within each bin</li>
+ *  <li>loading the distribution from "observed" data values</li>
+ *  <li>dividing the input data into "bin ranges" and reporting bin
+ *      frequency counts (data for histogram)</li>
+ *  <li>reporting univariate statistics describing the full set of data
+ *      values as well as the observations within each bin</li>
  *  <li>generating random values from the distribution</li>
  * </ul>
  *
  * Applications can use {@code EmpiricalDistribution} to build grouped
- * frequency histograms representing the input data or to generate random values
- * "like" those in the input file, i.e. the values generated will follow the
- * distribution of the values in the file.
+ * frequency histograms representing the input data or to generate random
+ * values "like" those in the input, i.e. the values generated will follow
+ * the distribution of the values in the file.
  *
  * <p>The implementation uses what amounts to the
  * <a href="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html">
@@ -84,16 +84,8 @@ import org.apache.commons.math4.legacy.core.jdkmath.AccurateMath;
  * grouped frequency distribution at the bin endpoints and interpolates within
  * bins using within-bin kernels.</p>
  *
- * <strong>USAGE NOTES:</strong>
- * <ul>
- *  <li>
- *   The {@code binCount} is set by default to 1000.  A good rule of thumb
- *   is to set the bin count to approximately the length of the input file
- *   divided by 10. </li>
- *  <li>
- *   The input file <i>must</i> be a plain text file containing one valid
- *   numeric entry per line.</li>
- * </ul>
+ * <strong>CAVEAT</strong>: It is advised that the {@link #from(int,double[])
+ * bin count} is about one tenth of the size of the input array.
  */
 public final class EmpiricalDistribution extends AbstractRealDistribution
     implements ContinuousDistribution {
@@ -547,7 +539,7 @@ public final class EmpiricalDistribution extends AbstractRealDistribution
 
     /**
      * The within-bin smoothing kernel: A Gaussian distribution
-     * (unless the bin contains only one observation, in which case
+     * (unless the bin contains 0 or 1 observation, in which case
      * a constant distribution is returned).
      *
      * @return the within-bin kernel factory.

[commons-math] 02/03: MATH-1462: Reproduce the infinite quantile bug

Posted by er...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

erans pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-math.git

commit d4de59a15a73e585ae74481bd1fb1265286d3955
Author: Maciej Kwidzinski <mk...@atlassian.com>
AuthorDate: Fri Sep 17 13:58:19 2021 +0200

    MATH-1462: Reproduce the infinite quantile bug
---
 .../distribution/EmpiricalDistributionTest.java    | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java b/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java
index b51a8ec..4f26e4e 100644
--- a/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java
+++ b/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java
@@ -474,4 +474,28 @@ public final class EmpiricalDistributionTest extends RealDistributionAbstractTes
         v = ed.inverseCumulativeProbability(p);
         Assert.assertTrue("p=" + p + " => v=" + v, v < 6350);
     }
+
+    @Test
+    public void testMath1462InfiniteQuantile() {
+        final double[] data = {
+            18054, 17548, 17350, 17860, 17827, 17653, 18113, 18405, 17746,
+            17647, 18160, 17955, 17705, 17890, 17974, 17857, 13287, 18645,
+            17775, 17730, 17996, 18263, 17861, 17161, 17717, 18134, 18669,
+            18340, 17221, 18292, 18146, 17520, 18207, 17829, 18206, 13301,
+            18257, 17626, 18358, 18340, 18320, 17852, 17804, 17577, 17718,
+            18099, 13395, 17763, 17911, 17978, 12935, 17519, 17550, 18728,
+            18518, 17698, 18739, 18553, 17982, 18113, 17974, 17961, 17645,
+            17867, 17890, 17498, 18718, 18191, 18177, 17923, 18164, 18155,
+            6212, 5961, 711
+        };
+
+        final EmpiricalDistribution ed = EmpiricalDistribution.from(1000, data);
+
+        double v;
+        double p;
+
+        p = 0.32;
+        v = ed.inverseCumulativeProbability(p);
+        Assert.assertTrue("p=" + p + " => v=" + v, Double.isFinite(v));
+    }
 }