You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by er...@apache.org on 2018/09/22 17:50:36 UTC
[4/8] commons-rng git commit: RNG-51: Added JMH performance test
RNG-51: Added JMH performance test
Project: http://git-wip-us.apache.org/repos/asf/commons-rng/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-rng/commit/120d275e
Tree: http://git-wip-us.apache.org/repos/asf/commons-rng/tree/120d275e
Diff: http://git-wip-us.apache.org/repos/asf/commons-rng/diff/120d275e
Branch: refs/heads/master
Commit: 120d275ed34b4d5b9a0729e788382ffa39d050ce
Parents: b772328
Author: aherbert <a....@sussex.ac.uk>
Authored: Fri Sep 21 14:52:16 2018 +0100
Committer: aherbert <a....@sussex.ac.uk>
Committed: Fri Sep 21 14:52:16 2018 +0100
----------------------------------------------------------------------
.../PoissonSamplerCachePerformance.java | 354 +++++++++++++++++++
.../distribution/LargeMeanPoissonSampler.java | 12 +-
.../distribution/PoissonSamplerCache.java | 25 +-
3 files changed, 379 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-rng/blob/120d275e/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/PoissonSamplerCachePerformance.java
----------------------------------------------------------------------
diff --git a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/PoissonSamplerCachePerformance.java b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/PoissonSamplerCachePerformance.java
new file mode 100644
index 0000000..273809b
--- /dev/null
+++ b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/PoissonSamplerCachePerformance.java
@@ -0,0 +1,354 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.examples.jmh.distribution;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.rng.RandomProviderState;
+import org.apache.commons.rng.RestorableUniformRandomProvider;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.PermutationSampler;
+import org.apache.commons.rng.sampling.distribution.DiscreteSampler;
+import org.apache.commons.rng.sampling.distribution.PoissonSampler;
+import org.apache.commons.rng.sampling.distribution.PoissonSamplerCache;
+import org.apache.commons.rng.simple.RandomSource;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Executes benchmark to compare the speed of generation of Poisson random numbers when using a
+ * cache.
+ *
+ * <p>The benchmark is designed for a worse case scenario of Poisson means that are uniformly spread
+ * over a range and non-integer. A single sample is required per mean, E.g.
+ *
+ * <pre>
+ * int min = 40;
+ * int max = 1000;
+ * int range = max - min;
+ * UniformRandomProvider rng = ...;
+ *
+ * // Compare ...
+ * for (int i = 0; i < 1000; i++) {
+ * new PoissonSampler(rng, min + rng.nextDouble() * range).sample();
+ * }
+ *
+ * // To ...
+ * PoissonSamplerCache cache = new PoissonSamplerCache(min, max);
+ * for (int i = 0; i < 1000; i++) {
+ * PoissonSamplerCache.createPoissonSampler(rng, min + rng.nextDouble() * range).sample();
+ * }
+ * </pre>
+ *
+ * <p>The alternative scenario where the means are integer is not considered as this could be easily
+ * handled by creating an array to hold the PoissonSamplers for each mean. This does not require any
+ * specialised caching of state and is simple enough to perform for single threaded applications:
+ *
+ * <pre>
+ * public class SimpleUnsafePoissonSamplerCache {
+ * int min = 50;
+ * int max = 100;
+ * PoissonSampler[] samplers = new PoissonSampler[max - min + 1];
+ *
+ * public PoissonSampler createPoissonSampler(UniformRandomProvider rng, int mean) {
+ * if (mean < min || mean > max) {
+ * return new PoissonSampler(rng, mean);
+ * }
+ * int index = mean - min;
+ * PoissonSampler sample = samplers[index];
+ * if (sampler == null) {
+ * sampler = new PoissonSampler(rng, mean);
+ * samplers[index] = sampler;
+ * }
+ * return sampler;
+ * }
+ * }
+ * </pre>
+ *
+ * Note that in this example the UniformRandomProvider is also cached and so this is only
+ * applicable to a single threaded application.
+ *
+ * <p>Re-written to use the PoissonSamplerCache would provide a new PoissonSampler per call in a
+ * thread-safe manner:
+ *
+ * <pre>
+ * public class SimplePoissonSamplerCache {
+ * int min = 50;
+ * int max = 100;
+ * PoissonSamplerCache samplers = new PoissonSamplerCache(min, max);
+ *
+ * public PoissonSampler createPoissonSampler(UniformRandomProvider rng, int mean) {
+ * return samplers.createPoissonSampler(rng, mean);
+ * }
+ * }
+ * </pre>
+*/
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgs = { "-server", "-Xms128M", "-Xmx128M" })
+public class PoissonSamplerCachePerformance {
+ /** Number of samples per run. */
+ private static final int NUM_SAMPLES = 100000;
+ /**
+ * Number of range samples.
+ *
+ * <p>Note: The LargeMeanPoissonSampler will not use a SmallMeanPoissonSampler
+ * if the mean is an integer. This will occur if the [range sample] * range is
+ * an integer.
+ *
+ * <p>If the SmallMeanPoissonSampler is not used then the cache has more
+ * advantage over the uncached version as relatively more time is spent in
+ * initialising the algorithm.
+ *
+ * <p>To avoid this use a prime number above the maximum range
+ * (currently 4096). Any number (n/RANGE_SAMPLES) * range will not be integer
+ * with n<RANGE_SAMPLES and range<RANGE_SAMPLES (unless n==0).
+ */
+ private static final int RANGE_SAMPLE_SIZE = 4099;
+ /** The size of the seed. */
+ private static final int SEED_SIZE = 128;
+
+ /**
+ * Seed used to ensure the tests are the same. This can be different per
+ * benchmark, but should be the same within the benchmark.
+ */
+ private static final int[] SEED;
+
+ /**
+ * The range sample. Should contain doubles in the range 0 inclusive to 1 exclusive.
+ *
+ * <p>The range sample is used to create a mean using:
+ * rangeMin + sample * (rangeMax - rangeMin).
+ *
+ * <p>Ideally this should be large enough to fully sample the
+ * range when expressed as discrete integers, i.e. no sparseness, and random.
+ */
+ private static final double[] RANGE_SAMPLE;
+
+ static {
+ // Build a random seed for all the tests
+ SEED = new int[SEED_SIZE];
+ final UniformRandomProvider rng = RandomSource.create(RandomSource.MWC_256);
+ for (int i = 0; i < SEED.length; i++) {
+ SEED[i] = rng.nextInt();
+ }
+
+ final int size = RANGE_SAMPLE_SIZE;
+ final int[] sample = PermutationSampler.natural(size);
+ PermutationSampler.shuffle(rng, sample);
+
+ RANGE_SAMPLE = new double[size];
+ for (int i = 0; i < size; i++) {
+ // Note: This will have one occurrence of zero in the range.
+ // This will create at least one LargeMeanPoissonSampler that will
+ // not use a SmallMeanPoissonSampler. The different performance of this
+ // will be lost among the other samples.
+ RANGE_SAMPLE[i] = (double) sample[i] / size;
+ }
+ }
+
+ /**
+ * The benchmark state (retrieve the various "RandomSource"s).
+ */
+ @State(Scope.Benchmark)
+ public static class Sources {
+ /**
+ * RNG providers.
+ *
+ * <p>Use different speeds.
+ *
+ * @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html">
+ * Commons RNG user guide</a>
+ */
+ @Param({ "SPLIT_MIX_64",
+ // Comment in for slower generators
+ //"MWC_256", "KISS", "WELL_1024_A", "WELL_44497_B"
+ })
+ private String randomSourceName;
+
+ /** RNG. */
+ private RestorableUniformRandomProvider generator;
+
+ /**
+ * The state of the generator at the start of the test (for reproducible
+ * results).
+ */
+ private RandomProviderState state;
+
+ /**
+ * @return the RNG.
+ */
+ public UniformRandomProvider getGenerator() {
+ generator.restoreState(state);
+ return generator;
+ }
+
+ /** Instantiates generator. */
+ @Setup
+ public void setup() {
+ final RandomSource randomSource = RandomSource
+ .valueOf(randomSourceName);
+ // Use the same seed
+ generator = RandomSource.create(randomSource, SEED.clone());
+ state = generator.saveState();
+ }
+ }
+
+ /**
+ * The range of mean values for testing the cache.
+ */
+ @State(Scope.Benchmark)
+ public static class MeanRange {
+ /**
+ * Test range.
+ *
+ * <p>The covers the best case scenario of caching everything (range=1) and upwards
+ * in powers of 4.
+ */
+ @Param({ "1", "4", "16", "64", "256", "1024", "4096"})
+ private double range;
+
+ /**
+ * Gets the mean.
+ *
+ * @param i the index
+ * @return the mean
+ */
+ public double getMean(int i) {
+ return getMin() + RANGE_SAMPLE[i % RANGE_SAMPLE.length] * range;
+ }
+
+ /**
+ * Gets the min of the range.
+ *
+ * @return the min
+ */
+ public double getMin() {
+ return PoissonSamplerCache.getMinimumCachedMean();
+ }
+
+ /**
+ * Gets the max of the range.
+ *
+ * @return the max
+ */
+ public double getMax() {
+ return getMin() + range;
+ }
+ }
+
+ /**
+ * A factory for creating Poisson sampler objects.
+ */
+ @FunctionalInterface
+ private interface PoissonSamplerFactory {
+ /**
+ * Creates a new Poisson sampler object.
+ *
+ * @param mean the mean
+ * @return The sampler
+ */
+ DiscreteSampler createPoissonSampler(double mean);
+ }
+
+ /**
+ * Exercises a poisson sampler created for a single use with a range of means.
+ *
+ * @param factory The factory.
+ * @param range The range of means.
+ * @param bh Data sink.
+ */
+ private static void runSample(PoissonSamplerFactory factory,
+ MeanRange range, Blackhole bh) {
+ for (int i = 0; i < NUM_SAMPLES; i++) {
+ bh.consume(factory.createPoissonSampler(range.getMean(i)).sample());
+ }
+ }
+
+ // Benchmarks methods below.
+
+ /**
+ * @param sources Source of randomness.
+ * @param range The range.
+ * @param bh Data sink.
+ */
+ @Benchmark
+ public void runPoissonSampler(Sources sources, MeanRange range,
+ Blackhole bh) {
+ final UniformRandomProvider r = sources.getGenerator();
+ final PoissonSamplerFactory factory = new PoissonSamplerFactory() {
+ @Override
+ public DiscreteSampler createPoissonSampler(double mean) {
+ return new PoissonSampler(r, mean);
+ }
+ };
+ runSample(factory, range, bh);
+ }
+
+ /**
+ * @param sources Source of randomness.
+ * @param range The range.
+ * @param bh Data sink.
+ */
+ @Benchmark
+ public void runPoissonSamplerCacheWhenEmpty(Sources sources, MeanRange range,
+ Blackhole bh) {
+ final UniformRandomProvider r = sources.getGenerator();
+ final PoissonSamplerCache cache = new PoissonSamplerCache(0, 0);
+ final PoissonSamplerFactory factory = new PoissonSamplerFactory() {
+ @Override
+ public DiscreteSampler createPoissonSampler(double mean) {
+ return cache.createPoissonSampler(r, mean);
+ }
+ };
+ runSample(factory, range, bh);
+ }
+
+ /**
+ * @param sources Source of randomness.
+ * @param range The range.
+ * @param bh Data sink.
+ */
+ @Benchmark
+ public void runPoissonSamplerCache(Sources sources, MeanRange range,
+ Blackhole bh) {
+ final UniformRandomProvider r = sources.getGenerator();
+ final PoissonSamplerCache cache = new PoissonSamplerCache(
+ range.getMin(), range.getMax());
+ final PoissonSamplerFactory factory = new PoissonSamplerFactory() {
+ @Override
+ public DiscreteSampler createPoissonSampler(double mean) {
+ return cache.createPoissonSampler(r, mean);
+ }
+ };
+ runSample(factory, range, bh);
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-rng/blob/120d275e/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
----------------------------------------------------------------------
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
index d71ae63..f07eb94 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
@@ -41,6 +41,9 @@ public class LargeMeanPoissonSampler
/** Class to compute {@code log(n!)}. This has no cached values. */
private static final InternalUtils.FactorialLog NO_CACHE_FACTORIAL_LOG;
+ /** Used when there is no requirement for a small mean Poisson sampler. */
+ private static final DiscreteSampler NO_SMALL_MEAN_POISSON_SAMPLER = null;
+
static {
// Create without a cache.
NO_CACHE_FACTORIAL_LOG = FactorialLog.create();
@@ -57,8 +60,6 @@ public class LargeMeanPoissonSampler
/** Algorithm constant: {@code Math.floor(mean)}. */
private final double lambda;
- /** Algorithm constant: {@code mean - lambda}. */
- private final double lambdaFractional;
/** Algorithm constant: {@code Math.log(lambda)}. */
private final double logLambda;
/** Algorithm constant: {@code factorialLog((int) lambda)}. */
@@ -115,7 +116,6 @@ public class LargeMeanPoissonSampler
// Cache values used in the algorithm
lambda = Math.floor(mean);
- lambdaFractional = mean - lambda;
logLambda = Math.log(lambda);
logLambdaFactorial = factorialLog((int) lambda);
delta = Math.sqrt(lambda * Math.log(32 * lambda / Math.PI + 1));
@@ -129,8 +129,9 @@ public class LargeMeanPoissonSampler
p2 = a2 / aSum;
// The algorithm requires a Poisson sample from the remaining lambda fraction.
+ final double lambdaFractional = mean - lambda;
smallMeanPoissonSampler = (lambdaFractional < Double.MIN_VALUE) ?
- null : // Not used.
+ NO_SMALL_MEAN_POISSON_SAMPLER : // Not used.
new SmallMeanPoissonSampler(rng, lambdaFractional);
}
@@ -160,7 +161,6 @@ public class LargeMeanPoissonSampler
// Use the state to initialise the algorithm
lambda = state.getLambdaRaw();
- this.lambdaFractional = lambdaFractional;
logLambda = state.getLogLambda();
logLambdaFactorial = state.getLogLambdaFactorial();
delta = state.getDelta();
@@ -172,7 +172,7 @@ public class LargeMeanPoissonSampler
// The algorithm requires a Poisson sample from the remaining lambda fraction.
smallMeanPoissonSampler = (lambdaFractional < Double.MIN_VALUE) ?
- null : // Not used.
+ NO_SMALL_MEAN_POISSON_SAMPLER : // Not used.
new SmallMeanPoissonSampler(rng, lambdaFractional);
}
http://git-wip-us.apache.org/repos/asf/commons-rng/blob/120d275e/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java
----------------------------------------------------------------------
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java
index 4b74084..8e36555 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSamplerCache.java
@@ -151,16 +151,17 @@ public class PoissonSamplerCache {
if (mean < PoissonSampler.PIVOT) {
return new SmallMeanPoissonSampler(rng, mean);
}
- // The algorithm is not valid if Math.floor(mean) is not an integer.
- if (mean > Integer.MAX_VALUE) {
- throw new IllegalArgumentException(mean + " > " + Integer.MAX_VALUE);
+ if (mean > maxN) {
+ // Outside the range of the cache.
+ // This avoids extra parameter checks and handles the case when
+ // the cache is empty or if Math.floor(mean) is not an integer.
+ return new LargeMeanPoissonSampler(rng, mean);
}
// Convert the mean into an integer.
final int n = (int) Math.floor(mean);
- // Check maxN first as the cache is likely to be used from min=0
- if (n > maxN || n < minN) {
- // Outside the range of the cache.
+ if (n < minN) {
+ // Outside the lower range of the cache.
return new LargeMeanPoissonSampler(rng, mean);
}
@@ -280,6 +281,18 @@ public class PoissonSamplerCache {
}
/**
+ * Gets the minimum mean value that can be cached.
+ *
+ * <p>Any {@link PoissonSampler} created with a mean below this level will not
+ * have any state that can be cached.
+ *
+ * @return the minimum cached mean
+ */
+ public static double getMinimumCachedMean() {
+ return PoissonSampler.PIVOT;
+ }
+
+ /**
* Create a new {@link PoissonSamplerCache} with the given range
* reusing the current cache values.
*