You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2015/11/10 04:25:34 UTC
[math] Added constructors taking sample data as arguments to
enumerated real and integer distributions. JIRA: MATH-1287.
Repository: commons-math
Updated Branches:
refs/heads/master fd37b5dd0 -> 5a1b473d5
Added constructors taking sample data as arguments to enumerated real and integer distributions. JIRA: MATH-1287.
Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/5a1b473d
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/5a1b473d
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/5a1b473d
Branch: refs/heads/master
Commit: 5a1b473d5c57092e7f77ca008b1085c512577ce7
Parents: fd37b5d
Author: Phil Steitz <ph...@gmail.com>
Authored: Mon Nov 9 20:25:22 2015 -0700
Committer: Phil Steitz <ph...@gmail.com>
Committed: Mon Nov 9 20:25:22 2015 -0700
----------------------------------------------------------------------
src/changes/changes.xml | 3 +
.../EnumeratedIntegerDistribution.java | 61 +++++++++++++++++-
.../EnumeratedRealDistribution.java | 67 ++++++++++++++++++--
.../EnumeratedIntegerDistributionTest.java | 10 +++
.../EnumeratedRealDistributionTest.java | 8 +++
5 files changed, 144 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 9c20d22..af3d402 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -54,6 +54,9 @@ If the output is not quite correct, check for invisible trailing spaces!
</release>
<release version="4.0" date="XXXX-XX-XX" description="">
+ <action dev="psteitz" type="update" issue="MATH-1287">
+ Added constructors taking sample data as arguments to enumerated real and integer distributions.
+ </action>
<action dev="oertl" type="fix" issue="MATH-1269"> <!-- backported to 3.6 -->
Fixed FastMath.exp that potentially returned NaN for non-NaN argument.
</action>
http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java b/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java
index 623e2cc..8cdf8e1 100644
--- a/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java
+++ b/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java
@@ -17,7 +17,10 @@
package org.apache.commons.math4.distribution;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
import org.apache.commons.math4.exception.DimensionMismatchException;
import org.apache.commons.math4.exception.MathArithmeticException;
@@ -94,6 +97,62 @@ public class EnumeratedIntegerDistribution extends AbstractIntegerDistribution {
throws DimensionMismatchException, NotPositiveException, MathArithmeticException,
NotFiniteNumberException, NotANumberException {
super(rng);
+ innerDistribution = new EnumeratedDistribution<Integer>(
+ rng, createDistribution(singletons, probabilities));
+ }
+
+ /**
+ * Create a discrete integer-valued distribution from the input data. Values are assigned
+ * mass based on their frequency.
+ *
+ * @param rng random number generator used for sampling
+ * @param data input dataset
+ */
+ public EnumeratedIntegerDistribution(final RandomGenerator rng, final int[] data) {
+ super(rng);
+ final Map<Integer, Integer> dataMap = new HashMap<Integer, Integer>();
+
+ for (int value : data) {
+ Integer count = dataMap.get(value);
+ if (count == null) {
+ count = new Integer(1);
+ } else {
+ count = new Integer(count.intValue() + 1);
+ }
+ dataMap.put(value, count);
+ }
+ final int massPoints = dataMap.size();
+ final double denom = data.length;
+ final int[] values = new int[massPoints];
+ final double[] probabilities = new double[massPoints];
+ int index = 0;
+ for (Entry<Integer, Integer> entry : dataMap.entrySet()) {
+ values[index] = entry.getKey();
+ probabilities[index] = entry.getValue().intValue() / denom;
+ index++;
+ }
+ innerDistribution = new EnumeratedDistribution<Integer>(rng, createDistribution(values, probabilities));
+ }
+
+ /**
+ * Create a discrete integer-valued distribution from the input data. Values are assigned
+ * mass based on their frequency. For example, [0,1,1,2] as input creates a distribution
+ * with values 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively,
+ *
+ * @param data input dataset
+ */
+ public EnumeratedIntegerDistribution(final int[] data) {
+ this(new Well19937c(), data);
+ }
+
+ /**
+ * Create the list of Pairs representing the distribution from singletons and probabilities.
+ *
+ * @param singletons values
+ * @param probabilities probabilities
+ * @return list of value/probability pairs
+ */
+ private List<Pair<Integer, Double>> createDistribution(int[] singletons, double[] probabilities) {
if (singletons.length != probabilities.length) {
throw new DimensionMismatchException(probabilities.length, singletons.length);
}
@@ -103,8 +162,8 @@ public class EnumeratedIntegerDistribution extends AbstractIntegerDistribution {
for (int i = 0; i < singletons.length; i++) {
samples.add(new Pair<Integer, Double>(singletons[i], probabilities[i]));
}
+ return samples;
- innerDistribution = new EnumeratedDistribution<Integer>(rng, samples);
}
/**
http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java b/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java
index 7702cc9..c3e7701 100644
--- a/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java
+++ b/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java
@@ -17,7 +17,10 @@
package org.apache.commons.math4.distribution;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
import org.apache.commons.math4.exception.DimensionMismatchException;
import org.apache.commons.math4.exception.MathArithmeticException;
@@ -51,7 +54,7 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
protected final EnumeratedDistribution<Double> innerDistribution;
/**
- * Create a discrete distribution using the given probability mass function
+ * Create a discrete real-valued distribution using the given probability mass function
* enumeration.
* <p>
* <b>Note:</b> this constructor will implicitly create an instance of
@@ -77,7 +80,7 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
}
/**
- * Create a discrete distribution using the given random number generator
+ * Create a discrete real-valued distribution using the given random number generator
* and probability mass function enumeration.
*
* @param rng random number generator.
@@ -95,17 +98,73 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
throws DimensionMismatchException, NotPositiveException, MathArithmeticException,
NotFiniteNumberException, NotANumberException {
super(rng);
+
+ innerDistribution = new EnumeratedDistribution<Double>(
+ rng, createDistribution(singletons, probabilities));
+ }
+
+ /**
+ * Create a discrete real-valued distribution from the input data. Values are assigned
+ * mass based on their frequency.
+ *
+ * @param rng random number generator used for sampling
+ * @param data input dataset
+ */
+ public EnumeratedRealDistribution(final RandomGenerator rng, final double[] data) {
+ super(rng);
+ final Map<Double, Integer> dataMap = new HashMap<Double, Integer>();
+
+ for (double value : data) {
+ Integer count = dataMap.get(value);
+ if (count == null) {
+ count = new Integer(1);
+ } else {
+ count = new Integer(count.intValue() + 1);
+ }
+ dataMap.put(value, count);
+ }
+ final int massPoints = dataMap.size();
+ final double denom = data.length;
+ final double[] values = new double[massPoints];
+ final double[] probabilities = new double[massPoints];
+ int index = 0;
+ for (Entry<Double, Integer> entry : dataMap.entrySet()) {
+ values[index] = entry.getKey();
+ probabilities[index] = entry.getValue().intValue() / denom;
+ index++;
+ }
+ innerDistribution = new EnumeratedDistribution<Double>(rng, createDistribution(values, probabilities));
+ }
+
+ /**
+ * Create a discrete real-valued distribution from the input data. Values are assigned
+ * mass based on their frequency. For example, [0,1,1,2] as input creates a distribution
+ * with values 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively,
+ *
+ * @param data input dataset
+ */
+ public EnumeratedRealDistribution(final double[] data) {
+ this(new Well19937c(), data);
+ }
+ /**
+ * Create the list of Pairs representing the distribution from singletons and probabilities.
+ *
+ * @param singletons values
+ * @param probabilities probabilities
+ * @return list of value/probability pairs
+ */
+ private List<Pair<Double, Double>> createDistribution(double[] singletons, double[] probabilities) {
if (singletons.length != probabilities.length) {
throw new DimensionMismatchException(probabilities.length, singletons.length);
}
- List<Pair<Double, Double>> samples = new ArrayList<Pair<Double, Double>>(singletons.length);
+ final List<Pair<Double, Double>> samples = new ArrayList<Pair<Double, Double>>(singletons.length);
for (int i = 0; i < singletons.length; i++) {
samples.add(new Pair<Double, Double>(singletons[i], probabilities[i]));
}
+ return samples;
- innerDistribution = new EnumeratedDistribution<Double>(rng, samples);
}
/**
http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java b/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java
index 48be05e..cd7bb4d 100644
--- a/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java
+++ b/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java
@@ -16,6 +16,8 @@
*/
package org.apache.commons.math4.distribution;
+import static org.junit.Assert.assertEquals;
+
import org.apache.commons.math4.distribution.EnumeratedIntegerDistribution;
import org.apache.commons.math4.exception.DimensionMismatchException;
import org.apache.commons.math4.exception.MathArithmeticException;
@@ -169,4 +171,12 @@ public class EnumeratedIntegerDistributionTest {
Assert.assertEquals(testDistribution.getNumericalVariance(),
sumOfSquares / n - FastMath.pow(sum / n, 2), 1e-2);
}
+
+ @Test
+ public void testCreateFromIntegers() {
+ final int[] data = new int[] {0, 1, 1, 2, 2, 2};
+ EnumeratedIntegerDistribution distribution = new EnumeratedIntegerDistribution(data);
+ assertEquals(0.5, distribution.probability(2), 0);
+ assertEquals(0.5, distribution.cumulativeProbability(1), 0);
+ }
}
http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java b/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java
index 9a02045..6b98c2d 100644
--- a/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java
+++ b/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java
@@ -240,4 +240,12 @@ public class EnumeratedRealDistributionTest {
assertEquals(18.0, distribution.inverseCumulativeProbability(0.5625), 0.0);
assertEquals(28.0, distribution.inverseCumulativeProbability(0.7500), 0.0);
}
+
+ @Test
+ public void testCreateFromDoubles() {
+ final double[] data = new double[] {0, 1, 1, 2, 2, 2};
+ EnumeratedRealDistribution distribution = new EnumeratedRealDistribution(data);
+ assertEquals(0.5, distribution.probability(2), 0);
+ assertEquals(0.5, distribution.cumulativeProbability(1), 0);
+ }
}