You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ps...@apache.org on 2015/11/10 04:25:34 UTC

[math] Added constructors taking sample data as arguments to enumerated real and integer distributions. JIRA: MATH-1287.

Repository: commons-math
Updated Branches:
  refs/heads/master fd37b5dd0 -> 5a1b473d5


Added constructors taking sample data as arguments to enumerated real and integer distributions. JIRA: MATH-1287.


Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/5a1b473d
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/5a1b473d
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/5a1b473d

Branch: refs/heads/master
Commit: 5a1b473d5c57092e7f77ca008b1085c512577ce7
Parents: fd37b5d
Author: Phil Steitz <ph...@gmail.com>
Authored: Mon Nov 9 20:25:22 2015 -0700
Committer: Phil Steitz <ph...@gmail.com>
Committed: Mon Nov 9 20:25:22 2015 -0700

----------------------------------------------------------------------
 src/changes/changes.xml                         |  3 +
 .../EnumeratedIntegerDistribution.java          | 61 +++++++++++++++++-
 .../EnumeratedRealDistribution.java             | 67 ++++++++++++++++++--
 .../EnumeratedIntegerDistributionTest.java      | 10 +++
 .../EnumeratedRealDistributionTest.java         |  8 +++
 5 files changed, 144 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 9c20d22..af3d402 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -54,6 +54,9 @@ If the output is not quite correct, check for invisible trailing spaces!
     </release>
 
     <release version="4.0" date="XXXX-XX-XX" description="">
+      <action dev="psteitz" type="update" issue="MATH-1287">
+        Added constructors taking sample data as arguments to enumerated real and integer distributions.
+      </action>
       <action dev="oertl" type="fix" issue="MATH-1269"> <!-- backported to 3.6 -->
         Fixed FastMath.exp that potentially returned NaN for non-NaN argument.
       </action>

http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java b/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java
index 623e2cc..8cdf8e1 100644
--- a/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java
+++ b/src/main/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistribution.java
@@ -17,7 +17,10 @@
 package org.apache.commons.math4.distribution;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.commons.math4.exception.DimensionMismatchException;
 import org.apache.commons.math4.exception.MathArithmeticException;
@@ -94,6 +97,62 @@ public class EnumeratedIntegerDistribution extends AbstractIntegerDistribution {
         throws DimensionMismatchException, NotPositiveException, MathArithmeticException,
                 NotFiniteNumberException, NotANumberException {
         super(rng);
+        innerDistribution = new EnumeratedDistribution<Integer>(
+                rng, createDistribution(singletons, probabilities));
+    }
+
+    /**
+     * Create a discrete integer-valued distribution from the input data.  Values are assigned
+     * mass based on their frequency.
+     *
+     * @param rng random number generator used for sampling
+     * @param data input dataset
+     */
+    public EnumeratedIntegerDistribution(final RandomGenerator rng, final int[] data) {
+        super(rng);
+        final Map<Integer, Integer> dataMap = new HashMap<Integer, Integer>();
+
+        for (int value : data) {
+            Integer count = dataMap.get(value);
+            if (count == null) {
+                count = new Integer(1);
+            } else {
+                count = new Integer(count.intValue() + 1);
+            }
+            dataMap.put(value, count);
+        }
+        final int massPoints = dataMap.size();
+        final double denom = data.length;
+        final int[] values = new int[massPoints];
+        final double[] probabilities = new double[massPoints];
+        int index = 0;
+        for (Entry<Integer, Integer> entry : dataMap.entrySet()) {
+            values[index] = entry.getKey();
+            probabilities[index] = entry.getValue().intValue() / denom;
+            index++;
+        }
+        innerDistribution = new EnumeratedDistribution<Integer>(rng, createDistribution(values, probabilities));
+    }
+
+    /**
+     * Create a discrete integer-valued distribution from the input data.  Values are assigned
+     * mass based on their frequency.  For example, [0,1,1,2] as input creates a distribution
+     * with values 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively,
+     *
+     * @param data input dataset
+     */
+    public EnumeratedIntegerDistribution(final int[] data) {
+        this(new Well19937c(), data);
+    }
+
+    /**
+     * Create the list of Pairs representing the distribution from singletons and probabilities.
+     *
+     * @param singletons values
+     * @param probabilities probabilities
+     * @return list of value/probability pairs
+     */
+    private  List<Pair<Integer, Double>>  createDistribution(int[] singletons, double[] probabilities) {
         if (singletons.length != probabilities.length) {
             throw new DimensionMismatchException(probabilities.length, singletons.length);
         }
@@ -103,8 +162,8 @@ public class EnumeratedIntegerDistribution extends AbstractIntegerDistribution {
         for (int i = 0; i < singletons.length; i++) {
             samples.add(new Pair<Integer, Double>(singletons[i], probabilities[i]));
         }
+        return samples;
 
-        innerDistribution = new EnumeratedDistribution<Integer>(rng, samples);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java b/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java
index 7702cc9..c3e7701 100644
--- a/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java
+++ b/src/main/java/org/apache/commons/math4/distribution/EnumeratedRealDistribution.java
@@ -17,7 +17,10 @@
 package org.apache.commons.math4.distribution;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.commons.math4.exception.DimensionMismatchException;
 import org.apache.commons.math4.exception.MathArithmeticException;
@@ -51,7 +54,7 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
     protected final EnumeratedDistribution<Double> innerDistribution;
 
     /**
-     * Create a discrete distribution using the given probability mass function
+     * Create a discrete real-valued distribution using the given probability mass function
      * enumeration.
      * <p>
      * <b>Note:</b> this constructor will implicitly create an instance of
@@ -77,7 +80,7 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
     }
 
     /**
-     * Create a discrete distribution using the given random number generator
+     * Create a discrete real-valued distribution using the given random number generator
      * and probability mass function enumeration.
      *
      * @param rng random number generator.
@@ -95,17 +98,73 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
         throws DimensionMismatchException, NotPositiveException, MathArithmeticException,
                NotFiniteNumberException, NotANumberException {
         super(rng);
+
+        innerDistribution = new EnumeratedDistribution<Double>(
+                rng, createDistribution(singletons, probabilities));
+    }
+
+    /**
+     * Create a discrete real-valued distribution from the input data.  Values are assigned
+     * mass based on their frequency.
+     *
+     * @param rng random number generator used for sampling
+     * @param data input dataset
+     */
+    public EnumeratedRealDistribution(final RandomGenerator rng, final double[] data) {
+        super(rng);
+        final Map<Double, Integer> dataMap = new HashMap<Double, Integer>();
+
+        for (double value : data) {
+            Integer count = dataMap.get(value);
+            if (count == null) {
+                count = new Integer(1);
+            } else {
+                count = new Integer(count.intValue() + 1);
+            }
+            dataMap.put(value, count);
+        }
+        final int massPoints = dataMap.size();
+        final double denom = data.length;
+        final double[] values = new double[massPoints];
+        final double[] probabilities = new double[massPoints];
+        int index = 0;
+        for (Entry<Double, Integer> entry : dataMap.entrySet()) {
+            values[index] = entry.getKey();
+            probabilities[index] = entry.getValue().intValue() / denom;
+            index++;
+        }
+        innerDistribution = new EnumeratedDistribution<Double>(rng, createDistribution(values, probabilities));
+    }
+
+    /**
+     * Create a discrete real-valued distribution from the input data.  Values are assigned
+     * mass based on their frequency.  For example, [0,1,1,2] as input creates a distribution
+     * with values 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively,
+     *
+     * @param data input dataset
+     */
+    public EnumeratedRealDistribution(final double[] data) {
+        this(new Well19937c(), data);
+    }
+    /**
+     * Create the list of Pairs representing the distribution from singletons and probabilities.
+     *
+     * @param singletons values
+     * @param probabilities probabilities
+     * @return list of value/probability pairs
+     */
+    private  List<Pair<Double, Double>>  createDistribution(double[] singletons, double[] probabilities) {
         if (singletons.length != probabilities.length) {
             throw new DimensionMismatchException(probabilities.length, singletons.length);
         }
 
-        List<Pair<Double, Double>> samples = new ArrayList<Pair<Double, Double>>(singletons.length);
+        final List<Pair<Double, Double>> samples = new ArrayList<Pair<Double, Double>>(singletons.length);
 
         for (int i = 0; i < singletons.length; i++) {
             samples.add(new Pair<Double, Double>(singletons[i], probabilities[i]));
         }
+        return samples;
 
-        innerDistribution = new EnumeratedDistribution<Double>(rng, samples);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java b/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java
index 48be05e..cd7bb4d 100644
--- a/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java
+++ b/src/test/java/org/apache/commons/math4/distribution/EnumeratedIntegerDistributionTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.commons.math4.distribution;
 
+import static org.junit.Assert.assertEquals;
+
 import org.apache.commons.math4.distribution.EnumeratedIntegerDistribution;
 import org.apache.commons.math4.exception.DimensionMismatchException;
 import org.apache.commons.math4.exception.MathArithmeticException;
@@ -169,4 +171,12 @@ public class EnumeratedIntegerDistributionTest {
         Assert.assertEquals(testDistribution.getNumericalVariance(),
                 sumOfSquares / n - FastMath.pow(sum / n, 2), 1e-2);
     }
+    
+    @Test
+    public void testCreateFromIntegers() {
+        final int[] data = new int[] {0, 1, 1, 2, 2, 2};
+        EnumeratedIntegerDistribution distribution = new EnumeratedIntegerDistribution(data);
+        assertEquals(0.5, distribution.probability(2), 0);
+        assertEquals(0.5, distribution.cumulativeProbability(1), 0);
+    }
 }

http://git-wip-us.apache.org/repos/asf/commons-math/blob/5a1b473d/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java b/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java
index 9a02045..6b98c2d 100644
--- a/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java
+++ b/src/test/java/org/apache/commons/math4/distribution/EnumeratedRealDistributionTest.java
@@ -240,4 +240,12 @@ public class EnumeratedRealDistributionTest {
         assertEquals(18.0, distribution.inverseCumulativeProbability(0.5625), 0.0);
         assertEquals(28.0, distribution.inverseCumulativeProbability(0.7500), 0.0);
     }
+    
+    @Test
+    public void testCreateFromDoubles() {
+        final double[] data = new double[] {0, 1, 1, 2, 2, 2};
+        EnumeratedRealDistribution distribution = new EnumeratedRealDistribution(data);
+        assertEquals(0.5, distribution.probability(2), 0);
+        assertEquals(0.5, distribution.cumulativeProbability(1), 0);
+    }
 }