You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by er...@apache.org on 2018/09/22 17:50:36 UTC

[4/8] commons-rng git commit: RNG-51: Added JMH performance test

RNG-51: Added JMH performance test

Project: http://git-wip-us.apache.org/repos/asf/commons-rng/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-rng/commit/120d275e
Tree: http://git-wip-us.apache.org/repos/asf/commons-rng/tree/120d275e
Diff: http://git-wip-us.apache.org/repos/asf/commons-rng/diff/120d275e

Branch: refs/heads/master
Commit: 120d275ed34b4d5b9a0729e788382ffa39d050ce
Parents: b772328
Author: aherbert <a....@sussex.ac.uk>
Authored: Fri Sep 21 14:52:16 2018 +0100
Committer: aherbert <a....@sussex.ac.uk>
Committed: Fri Sep 21 14:52:16 2018 +0100

----------------------------------------------------------------------
 .../PoissonSamplerCachePerformance.java         | 354 +++++++++++++++++++
 .../distribution/LargeMeanPoissonSampler.java   |  12 +-
 .../distribution/PoissonSamplerCache.java       |  25 +-
 3 files changed, 379 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-rng/blob/120d275e/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/PoissonSamplerCachePerformance.java
----------------------------------------------------------------------
diff --git a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/PoissonSamplerCachePerformance.java b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/PoissonSamplerCachePerformance.java
new file mode 100644
index 0000000..273809b
--- /dev/null
+++ b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/PoissonSamplerCachePerformance.java
@@ -0,0 +1,354 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.examples.jmh.distribution;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.rng.RandomProviderState;
+import org.apache.commons.rng.RestorableUniformRandomProvider;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.PermutationSampler;
+import org.apache.commons.rng.sampling.distribution.DiscreteSampler;
+import org.apache.commons.rng.sampling.distribution.PoissonSampler;
+import org.apache.commons.rng.sampling.distribution.PoissonSamplerCache;
+import org.apache.commons.rng.simple.RandomSource;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Executes benchmark to compare the speed of generation of Poisson random numbers when using a
+ * cache.
+ *
+ * <p>The benchmark is designed for a worse case scenario of Poisson means that are uniformly spread
+ * over a range and non-integer. A single sample is required per mean, E.g.
+ *
+ * <pre>
+ * int min = 40;
+ * int max = 1000;
+ * int range = max - min;
+ * UniformRandomProvider rng = ...;
+ *
+ * // Compare ...
+ * for (int i = 0; i < 1000; i++) {
+ *   new PoissonSampler(rng, min + rng.nextDouble() * range).sample();
+ * }
+ *
+ * // To ...
+ * PoissonSamplerCache cache = new PoissonSamplerCache(min, max);
+ * for (int i = 0; i < 1000; i++) {
+ *   PoissonSamplerCache.createPoissonSampler(rng, min + rng.nextDouble() * range).sample();
+ * }
+ * </pre>
+ *
+ * <p>The alternative scenario where the means are integer is not considered as this could be easily
+ * handled by creating an array to hold the PoissonSamplers for each mean. This does not require any
+ * specialised caching of state and is simple enough to perform for single threaded applications:
+ *
+ * <pre>
+ * public class SimpleUnsafePoissonSamplerCache {
+ *   int min = 50;
+ *   int max = 100;
+ *   PoissonSampler[] samplers = new PoissonSampler[max - min + 1];
+ *
+ *   public PoissonSampler createPoissonSampler(UniformRandomProvider rng, int mean) {
+ *     if (mean < min || mean > max) {
+ *       return new PoissonSampler(rng, mean);
+ *     }
+ *     int index = mean - min;
+ *     PoissonSampler sample = samplers[index];
+ *     if (sampler == null) {
+ *       sampler = new PoissonSampler(rng, mean);
+ *       samplers[index] = sampler;
+ *     }
+ *     return sampler;
+ *   }
+ * }
+ * </pre>
+ *
+ * Note that in this example the UniformRandomProvider is also cached and so this is only
+ * applicable to a single threaded application.
+ *
+ * <p>Re-written to use the PoissonSamplerCache would provide a new PoissonSampler per call in a
+ * thread-safe manner:
+ *
+ * <pre>
+ * public class SimplePoissonSamplerCache {
+ *   int min = 50;
+ *   int max = 100;
+ *   PoissonSamplerCache samplers = new PoissonSamplerCache(min, max);
+ *
+ *   public PoissonSampler createPoissonSampler(UniformRandomProvider rng, int mean) {
+ *       return samplers.createPoissonSampler(rng, mean);
+ *   }
+ * }
+ * </pre>
+*/
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgs = { "-server", "-Xms128M", "-Xmx128M" })
+public class PoissonSamplerCachePerformance {
+    /** Number of samples per run. */
+    private static final int NUM_SAMPLES = 100000;
+    /**
+     * Number of range samples.
+     *
+     * <p>Note: The LargeMeanPoissonSampler will not use a SmallMeanPoissonSampler
+     * if the mean is an integer. This will occur if the [range sample] * range is
+     * an integer.
+     *
+     * <p>If the SmallMeanPoissonSampler is not used then the cache has more
+     * advantage over the uncached version as relatively more time is spent in
+     * initialising the algorithm.
+     *
+     * <p>To avoid this use a prime number above the maximum range
+     * (currently 4096). Any number (n/RANGE_SAMPLES) * range will not be integer
+     * with n<RANGE_SAMPLES and range<RANGE_SAMPLES (unless n==0).
+     */
+    private static final int RANGE_SAMPLE_SIZE = 4099;
+    /** The size of the seed. */
+    private static final int SEED_SIZE = 128;
+
+    /**
+     * Seed used to ensure the tests are the same. This can be different per
+     * benchmark, but should be the same within the benchmark.
+     */
+    private static final int[] SEED;
+
+    /**
+     * The range sample. Should contain doubles in the range 0 inclusive to 1 exclusive.
+     *
+     * <p>The range sample is used to create a mean using:
+     * rangeMin + sample * (rangeMax - rangeMin).
+     *
+     * <p>Ideally this should be large enough to fully sample the
+     * range when expressed as discrete integers, i.e. no sparseness, and random.
+     */
+    private static final double[] RANGE_SAMPLE;
+
+    static {
+        // Build a random seed for all the tests
+        SEED = new int[SEED_SIZE];
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.MWC_256);
+        for (int i = 0; i < SEED.length; i++) {
+            SEED[i] = rng.nextInt();
+        }
+
+        final int size = RANGE_SAMPLE_SIZE;
+        final int[] sample = PermutationSampler.natural(size);
+        PermutationSampler.shuffle(rng, sample);
+
+        RANGE_SAMPLE = new double[size];
+        for (int i = 0; i < size; i++) {
+            // Note: This will have one occurrence of zero in the range.
+            // This will create at least one LargeMeanPoissonSampler that will
+            // not use a SmallMeanPoissonSampler. The different performance of this
+            // will be lost among the other samples.
+            RANGE_SAMPLE[i] = (double) sample[i] / size;
+        }
+    }
+
+    /**
+     * The benchmark state (retrieve the various "RandomSource"s).
+     */
+    @State(Scope.Benchmark)
+    public static class Sources {
+        /**
+         * RNG providers.
+         *
+         * <p>Use different speeds.
+         *
+         * @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html">
+         *      Commons RNG user guide</a>
+         */
+        @Param({ "SPLIT_MIX_64",
+            // Comment in for slower generators
+            //"MWC_256", "KISS", "WELL_1024_A", "WELL_44497_B"
+            })
+        private String randomSourceName;
+
+        /** RNG. */
+        private RestorableUniformRandomProvider generator;
+
+        /**
+         * The state of the generator at the start of the test (for reproducible
+         * results).
+         */
+        private RandomProviderState state;
+
+        /**
+         * @return the RNG.
+         */
+        public UniformRandomProvider getGenerator() {
+            generator.restoreState(state);
+            return generator;
+        }
+
+        /** Instantiates generator. */
+        @Setup
+        public void setup() {
+            final RandomSource randomSource = RandomSource
+                    .valueOf(randomSourceName);
+            // Use the same seed
+            generator = RandomSource.create(randomSource, SEED.clone());
+            state = generator.saveState();
+        }
+    }
+
+    /**
+     * The range of mean values for testing the cache.
+     */
+    @State(Scope.Benchmark)
+    public static class MeanRange {
+        /**
+         * Test range.
+         *
+         * <p>The covers the best case scenario of caching everything (range=1) and upwards
+         * in powers of 4.
+         */
+        @Param({ "1", "4", "16", "64", "256", "1024", "4096"})
+        private double range;
+
+        /**
+         * Gets the mean.
+         *
+         * @param i the index
+         * @return the mean
+         */
+        public double getMean(int i) {
+            return getMin() + RANGE_SAMPLE[i % RANGE_SAMPLE.length] * range;
+        }
+
+        /**
+         * Gets the min of the range.
+         *
+         * @return the min
+         */
+        public double getMin() {
+            return PoissonSamplerCache.getMinimumCachedMean();
+        }
+
+        /**
+         * Gets the max of the range.
+         *
+         * @return the max
+         */
+        public double getMax() {
+            return getMin() + range;
+        }
+    }
+
+    /**
+     * A factory for creating Poisson sampler objects.
+     */
+    @FunctionalInterface
+    private interface PoissonSamplerFactory {
+        /**
+         * Creates a new Poisson sampler object.
+         *
+         * @param mean the mean
+         * @return The sampler
+         */
+        DiscreteSampler createPoissonSampler(double mean);
+    }
+
+    /**
+     * Exercises a poisson sampler created for a single use with a range of means.
+     *
+     * @param factory The factory.
+     * @param range   The range of means.
+     * @param bh      Data sink.
+     */
+    private static void runSample(PoissonSamplerFactory factory,
+            MeanRange range, Blackhole bh) {
+        for (int i = 0; i < NUM_SAMPLES; i++) {
+            bh.consume(factory.createPoissonSampler(range.getMean(i)).sample());
+        }
+    }
+
+    // Benchmarks methods below.
+
+    /**
+     * @param sources Source of randomness.
+     * @param range   The range.
+     * @param bh      Data sink.
+     */
+    @Benchmark
+    public void runPoissonSampler(Sources sources, MeanRange range,
+            Blackhole bh) {
+        final UniformRandomProvider r = sources.getGenerator();
+        final PoissonSamplerFactory factory = new PoissonSamplerFactory() {
+            @Override
+            public DiscreteSampler createPoissonSampler(double mean) {
+                return new PoissonSampler(r, mean);
+            }
+        };
+        runSample(factory, range, bh);
+    }
+
+    /**
+     * @param sources Source of randomness.
+     * @param range   The range.
+     * @param bh      Data sink.
+     */
+    @Benchmark
+    public void runPoissonSamplerCacheWhenEmpty(Sources sources, MeanRange range,
+            Blackhole bh) {
+        final UniformRandomProvider r = sources.getGenerator();
+        final PoissonSamplerCache cache = new PoissonSamplerCache(0, 0);
+        final PoissonSamplerFactory factory = new PoissonSamplerFactory() {
+            @Override
+            public DiscreteSampler createPoissonSampler(double mean) {
+                return cache.createPoissonSampler(r, mean);
+            }
+        };
+        runSample(factory, range, bh);
+    }
+
+    /**
+     * @param sources Source of randomness.
+     * @param range   The range.
+     * @param bh      Data sink.
+     */
+    @Benchmark
+    public void runPoissonSamplerCache(Sources sources, MeanRange range,
+            Blackhole bh) {
+        final UniformRandomProvider r = sources.getGenerator();
+        final PoissonSamplerCache cache = new PoissonSamplerCache(
+                range.getMin(), range.getMax());
+        final PoissonSamplerFactory factory = new PoissonSamplerFactory() {
+            @Override
+            public DiscreteSampler createPoissonSampler(double mean) {
+                return cache.createPoissonSampler(r, mean);
+            }
+        };
+        runSample(factory, range, bh);
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-rng/blob/120d275e/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
----------------------------------------------------------------------
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
index d71ae63..f07eb94 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
@@ -41,6 +41,9 @@ public class LargeMeanPoissonSampler
     /** Class to compute {@code log(n!)}. This has no cached values. */
     private static final InternalUtils.FactorialLog NO_CACHE_FACTORIAL_LOG;
 
+    /** Used when there is no requirement for a small mean Poisson sampler. */
+    private static final DiscreteSampler NO_SMALL_MEAN_POISSON_SAMPLER = null;
+
     static {
         // Create without a cache.
         NO_CACHE_FACTORIAL_LOG = FactorialLog.create();
@@ -57,8 +60,6 @@ public class LargeMeanPoissonSampler
 
     /** Algorithm constant: {@code Math.floor(mean)}. */
     private final double lambda;
-    /** Algorithm constant: {@code mean - lambda}. */
-    private final double lambdaFractional;
     /** Algorithm constant: {@code Math.log(lambda)}. */
     private final double logLambda;
     /** Algorithm constant: {@code factorialLog((int) lambda)}. */
@@ -115,7 +116,6 @@ public class LargeMeanPoissonSampler
 
         // Cache values used in the algorithm
         lambda = Math.floor(mean);
-        lambdaFractional = mean - lambda;
         logLambda = Math.log(lambda);
         logLambdaFactorial = factorialLog((int) lambda);
         delta = Math.sqrt(lambda * Math.log(32 * lambda / Math.PI + 1));
@@ -129,8 +129,9 @@ public class LargeMeanPoissonSampler
         p2 = a2 / aSum;
 
         // The algorithm requires a Poisson sample from the remaining lambda fraction.
+        final double lambdaFractional = mean - lambda;
         smallMeanPoissonSampler = (lambdaFractional < Double.MIN_VALUE) ?
-            null : // Not used.
+            NO_SMALL_MEAN_POISSON_SAMPLER : // Not used.
             new SmallMeanPoissonSampler(rng, lambdaFractional);
     }
 
@@ -160,7 +161,6 @@ public class LargeMeanPoissonSampler
 
         // Use the state to initialise the algorithm
         lambda = state.getLambdaRaw();
-        this.lambdaFractional = lambdaFractional;
         logLambda = state.getLogLambda();
         logLambdaFactorial = state.getLogLambdaFactorial();
         delta = state.getDelta();
@@ -172,7 +172,7 @@ public class LargeMeanPoissonSampler
 
         // The algorithm requires a Poisson sample from the remaining lambda fraction.
         smallMeanPoissonSampler = (lambdaFractional < Double.MIN_VALUE) ?
-            null : // Not used.
+            NO_SMALL_MEAN_POISSON_SAMPLER : // Not used.
             new SmallMeanPoissonSampler(rng, lambdaFractional);
     }
 

http://git-wip-us.apache.org/repos/asf/commons-rng/blob/120d275e/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java
----------------------------------------------------------------------
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java
index 4b74084..8e36555 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java
@@ -151,16 +151,17 @@ public class PoissonSamplerCache {
         if (mean < PoissonSampler.PIVOT) {
             return new SmallMeanPoissonSampler(rng, mean);
         }
-        // The algorithm is not valid if Math.floor(mean) is not an integer.
-        if (mean > Integer.MAX_VALUE) {
-            throw new IllegalArgumentException(mean + " > " + Integer.MAX_VALUE);
+        if (mean > maxN) {
+            // Outside the range of the cache.
+            // This avoids extra parameter checks and handles the case when
+            // the cache is empty or if Math.floor(mean) is not an integer.
+            return new LargeMeanPoissonSampler(rng, mean);
         }
 
         // Convert the mean into an integer.
         final int n = (int) Math.floor(mean);
-        // Check maxN first as the cache is likely to be used from min=0
-        if (n > maxN || n < minN) {
-            // Outside the range of the cache.
+        if (n < minN) {
+            // Outside the lower range of the cache.
             return new LargeMeanPoissonSampler(rng, mean);
         }
 
@@ -280,6 +281,18 @@ public class PoissonSamplerCache {
     }
 
     /**
+     * Gets the minimum mean value that can be cached.
+     *
+     * <p>Any {@link PoissonSampler} created with a mean below this level will not
+     * have any state that can be cached.
+     *
+     * @return the minimum cached mean
+     */
+    public static double getMinimumCachedMean() {
+        return PoissonSampler.PIVOT;
+    }
+
+    /**
      * Create a new {@link PoissonSamplerCache} with the given range
      * reusing the current cache values.
      *