You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2021/05/06 19:55:36 UTC
[commons-rng] 01/03: RNG-134: BoxSampler to sample uniformly from a
box (or hyperrectangle)
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit d344c5fcbcd7bec12c34b24d8ca0573575c064d7
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Fri Apr 30 21:23:48 2021 +0100
RNG-134: BoxSampler to sample uniformly from a box (or hyperrectangle)
---
.../commons/rng/sampling/shape/BoxSampler.java | 269 +++++++++++++++
.../commons/rng/sampling/shape/BoxSamplerTest.java | 369 +++++++++++++++++++++
src/changes/changes.xml | 3 +
3 files changed, 641 insertions(+)
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/shape/BoxSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/shape/BoxSampler.java
new file mode 100644
index 0000000..227fad6
--- /dev/null
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/shape/BoxSampler.java
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.sampling.shape;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.SharedStateSampler;
+
+/**
+ * Generate points uniformly distributed within a n-dimension box (hyperrectangle).
+ *
+ * <p>Sampling uses:</p>
+ *
+ * <ul>
+ * <li>{@link UniformRandomProvider#nextDouble()}
+ * </ul>
+ *
+ * @see <a href="https://en.wikipedia.org/wiki/Hyperrectangle">Hyperrectangle (Wikipedia)</a>
+ * @since 1.4
+ */
+public abstract class BoxSampler implements SharedStateSampler<BoxSampler> {
+ /** The dimension for 2D sampling. */
+ private static final int TWO_D = 2;
+ /** The dimension for 3D sampling. */
+ private static final int THREE_D = 3;
+ /** The source of randomness. */
+ private final UniformRandomProvider rng;
+
+ // The following code defines a point within the range ab:
+ // p = (1 - u)a + ub, u in [0, 1]
+ //
+ // This is the same method used in the
+ // o.a.c.rng.sampling.distribution.ContinuousUniformSampler but extended to N-dimensions.
+
+ /**
+ * Sample uniformly from a box in 2D. This is an non-array based specialisation of
+ * {@link BoxSamplerND} for performance.
+ */
+ private static class BoxSampler2D extends BoxSampler {
+ /** The x component of bound a. */
+ private final double ax;
+ /** The y component of bound a. */
+ private final double ay;
+ /** The x component of bound b. */
+ private final double bx;
+ /** The y component of bound b. */
+ private final double by;
+
+ /**
+ * @param a Bound a.
+ * @param b Bound b.
+ * @param rng Source of randomness.
+ */
+ BoxSampler2D(double[] a, double[] b, UniformRandomProvider rng) {
+ super(rng);
+ ax = a[0];
+ ay = a[1];
+ bx = b[0];
+ by = b[1];
+ }
+
+ /**
+ * @param rng Source of randomness.
+ * @param source Source to copy.
+ */
+ BoxSampler2D(UniformRandomProvider rng, BoxSampler2D source) {
+ super(rng);
+ ax = source.ax;
+ ay = source.ay;
+ bx = source.bx;
+ by = source.by;
+ }
+
+ @Override
+ public double[] sample() {
+ return new double[] {createSample(ax, bx),
+ createSample(ay, by)};
+ }
+
+ @Override
+ public BoxSampler withUniformRandomProvider(UniformRandomProvider rng) {
+ return new BoxSampler2D(rng, this);
+ }
+ }
+
+ /**
+ * Sample uniformly from a box in 3D. This is an non-array based specialisation of
+ * {@link BoxSamplerND} for performance.
+ */
+ private static class BoxSampler3D extends BoxSampler {
+ /** The x component of bound a. */
+ private final double ax;
+ /** The y component of bound a. */
+ private final double ay;
+ /** The z component of bound a. */
+ private final double az;
+ /** The x component of bound b. */
+ private final double bx;
+ /** The y component of bound b. */
+ private final double by;
+ /** The z component of bound b. */
+ private final double bz;
+
+ /**
+ * @param a Bound a.
+ * @param b Bound b.
+ * @param rng Source of randomness.
+ */
+ BoxSampler3D(double[] a, double[] b, UniformRandomProvider rng) {
+ super(rng);
+ ax = a[0];
+ ay = a[1];
+ az = a[2];
+ bx = b[0];
+ by = b[1];
+ bz = b[2];
+ }
+
+ /**
+ * @param rng Source of randomness.
+ * @param source Source to copy.
+ */
+ BoxSampler3D(UniformRandomProvider rng, BoxSampler3D source) {
+ super(rng);
+ ax = source.ax;
+ ay = source.ay;
+ az = source.az;
+ bx = source.bx;
+ by = source.by;
+ bz = source.bz;
+ }
+
+ @Override
+ public double[] sample() {
+ return new double[] {createSample(ax, bx),
+ createSample(ay, by),
+ createSample(az, bz)};
+ }
+
+ @Override
+ public BoxSampler withUniformRandomProvider(UniformRandomProvider rng) {
+ return new BoxSampler3D(rng, this);
+ }
+ }
+
+ /**
+ * Sample uniformly from a box in ND.
+ */
+ private static class BoxSamplerND extends BoxSampler {
+ /** Bound a. */
+ private final double[] a;
+ /** Bound b. */
+ private final double[] b;
+
+ /**
+ * @param a Bound a.
+ * @param b Bound b.
+ * @param rng Source of randomness.
+ */
+ BoxSamplerND(double[] a, double[] b, UniformRandomProvider rng) {
+ super(rng);
+ // Defensive copy
+ this.a = a.clone();
+ this.b = b.clone();
+ }
+
+ /**
+ * @param rng Source of randomness.
+ * @param source Source to copy.
+ */
+ BoxSamplerND(UniformRandomProvider rng, BoxSamplerND source) {
+ super(rng);
+ // Shared state is immutable
+ a = source.a;
+ b = source.b;
+ }
+
+ @Override
+ public double[] sample() {
+ final double[] x = new double[a.length];
+ for (int i = 0; i < x.length; i++) {
+ x[i] = createSample(a[i], b[i]);
+ }
+ return x;
+ }
+
+ @Override
+ public BoxSampler withUniformRandomProvider(UniformRandomProvider rng) {
+ return new BoxSamplerND(rng, this);
+ }
+ }
+
+ /**
+ * @param rng Source of randomness.
+ */
+ BoxSampler(UniformRandomProvider rng) {
+ this.rng = rng;
+ }
+
+ /**
+ * @return a random Cartesian point within the box.
+ */
+ public abstract double[] sample();
+
+ /**
+ * Creates the sample between bound a and b.
+ *
+ * @param a Bound a
+ * @param b Bound b
+ * @return the sample
+ */
+ double createSample(double a, double b) {
+ final double u = rng.nextDouble();
+ return (1.0 - u) * a + u * b;
+ }
+
+ /**
+ * Create a box sampler with bounds {@code a} and {@code b}.
+ * Points are returned within the box defined by the bounds.
+ *
+ * <p>Sampling is supported in dimensions of 2 or above.
+ *
+ * <p>There is no requirement that {@code a <= b}.
+ *
+ * @param a Bound a.
+ * @param b Bound b.
+ * @param rng Source of randomness.
+ * @return the sampler
+ * @throws IllegalArgumentException If the bounds do not have the same
+ * dimension; the dimension is less than 2; or bounds have non-finite coordinates.
+ */
+ public static BoxSampler of(double[] a,
+ double[] b,
+ UniformRandomProvider rng) {
+ final int dimension = a.length;
+ if (dimension != b.length) {
+ throw new IllegalArgumentException(
+ new StringBuilder("Mismatch of box dimensions: ").append(dimension).append(',')
+ .append(b.length).toString());
+ }
+ // Detect non-finite bounds
+ Coordinates.requireFinite(a, "Bound a");
+ Coordinates.requireFinite(b, "Bound b");
+ // Low dimension specialisations
+ if (dimension == TWO_D) {
+ return new BoxSampler2D(a, b, rng);
+ } else if (dimension == THREE_D) {
+ return new BoxSampler3D(a, b, rng);
+ } else if (dimension > THREE_D) {
+ return new BoxSamplerND(a, b, rng);
+ }
+ // Less than 2D
+ throw new IllegalArgumentException("Unsupported dimension: " + dimension);
+ }
+}
diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/shape/BoxSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/shape/BoxSamplerTest.java
new file mode 100644
index 0000000..96a15e8
--- /dev/null
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/shape/BoxSamplerTest.java
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.sampling.shape;
+
+import java.util.Arrays;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.commons.math3.stat.inference.ChiSquareTest;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.RandomAssert;
+import org.apache.commons.rng.sampling.UnitSphereSampler;
+import org.apache.commons.rng.simple.RandomSource;
+
+/**
+ * Test for {@link BoxSampler}.
+ */
+public class BoxSamplerTest {
+ /**
+ * Test an unsupported dimension.
+ */
+ @Test(expected = IllegalArgumentException.class)
+ public void testInvalidDimensionThrows() {
+ final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
+ BoxSampler.of(new double[1], new double[1], rng);
+ }
+
+ /**
+ * Test a dimension mismatch between vertices.
+ */
+ @Test
+ public void testDimensionMismatchThrows() {
+ final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
+ final double[] c2 = new double[2];
+ final double[] c3 = new double[3];
+ for (double[][] c : new double[][][] {
+ {c2, c3},
+ {c3, c2},
+ }) {
+ try {
+ BoxSampler.of(c[0], c[1], rng);
+ Assert.fail(String.format("Did not detect dimension mismatch: %d,%d",
+ c[0].length, c[1].length));
+ } catch (IllegalArgumentException ex) {
+ // Expected
+ }
+ }
+ }
+
+ /**
+ * Test non-finite vertices.
+ */
+ @Test
+ public void testNonFiniteVertexCoordinates() {
+ final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
+ // A valid box
+ final double[][] c = new double[][] {
+ {0, 1, 2}, {-1, 2, 3}
+ };
+ Assert.assertNotNull(BoxSampler.of(c[0], c[1], rng));
+ final double[] bad = {Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.NaN};
+ for (int i = 0; i < c.length; i++) {
+ for (int j = 0; j < c[0].length; j++) {
+ for (final double d : bad) {
+ final double value = c[i][j];
+ c[i][j] = d;
+ try {
+ BoxSampler.of(c[0], c[1], rng);
+ Assert.fail(String.format("Did not detect non-finite coordinate: %d,%d = %s", i, j, d));
+ } catch (IllegalArgumentException ex) {
+ // Expected
+ }
+ c[i][j] = value;
+ }
+ }
+ }
+ }
+
+ /**
+ * Test a box with coordinates that are separated by more than
+ * {@link Double#MAX_VALUE} in 2D.
+ */
+ @Test
+ public void testExtremeValueCoordinates2D() {
+ testExtremeValueCoordinates(2);
+ }
+
+ /**
+ * Test a box with coordinates that are separated by more than
+ * {@link Double#MAX_VALUE} in 3D.
+ */
+ @Test
+ public void testExtremeValueCoordinates3D() {
+ testExtremeValueCoordinates(3);
+ }
+
+ /**
+ * Test a box with coordinates that are separated by more than
+ * {@link Double#MAX_VALUE} in 4D.
+ */
+ @Test
+ public void testExtremeValueCoordinates4D() {
+ testExtremeValueCoordinates(4);
+ }
+
+ /**
+ * Test a box with coordinates that are separated by more than
+ * {@link Double#MAX_VALUE}.
+ *
+ * @param dimension the dimension
+ */
+ private static void testExtremeValueCoordinates(int dimension) {
+ // Object seed so use Long not long
+ final Long seed = 456456L;
+ final double[][] c1 = new double[2][dimension];
+ final double[][] c2 = new double[2][dimension];
+ // Create a valid box that can be scaled
+ Arrays.fill(c1[0], -1);
+ Arrays.fill(c1[1], 1);
+ // Extremely large value for scaling. Use a power of 2 for exact scaling.
+ final double scale = 0x1.0p1023;
+ for (int i = 0; i < c1.length; i++) {
+ // Scale the second box
+ for (int j = 0; j < dimension; j++) {
+ c2[i][j] = c1[i][j] * scale;
+ }
+ }
+ // Show the box is too big to compute vectors between points.
+ Assert.assertEquals("Expect vector b - a to be infinite in the x dimension",
+ Double.POSITIVE_INFINITY, c2[1][0] - c2[0][0], 0.0);
+
+ final BoxSampler sampler1 = BoxSampler.of(c1[0], c1[1],
+ RandomSource.create(RandomSource.XO_RO_SHI_RO_128_PP, seed));
+ final BoxSampler sampler2 = BoxSampler.of(c2[0], c2[1],
+ RandomSource.create(RandomSource.XO_RO_SHI_RO_128_PP, seed));
+
+ for (int n = 0; n < 10; n++) {
+ final double[] a = sampler1.sample();
+ final double[] b = sampler2.sample();
+ for (int i = 0; i < a.length; i++) {
+ a[i] *= scale;
+ }
+ Assert.assertArrayEquals(a, b, 0.0);
+ }
+ }
+
+ /**
+ * Test the distribution of points in 2D.
+ */
+ @Test
+ public void testDistribution2D() {
+ testDistributionND(2);
+ }
+
+ /**
+ * Test the distribution of points in 3D.
+ */
+ @Test
+ public void testDistribution3D() {
+ testDistributionND(3);
+ }
+
+ /**
+ * Test the distribution of points in 4D.
+ */
+ @Test
+ public void testDistribution4D() {
+ testDistributionND(4);
+ }
+
+ /**
+ * Test the distribution of points in N dimensions. The output coordinates
+ * should be uniform in the box.
+ *
+ * @param dimension the dimension
+ */
+ private static void testDistributionND(int dimension) {
+ final UniformRandomProvider rng = RandomSource.create(RandomSource.JSF_64, 0xdabfab);
+
+ final UnitSphereSampler sphere = UnitSphereSampler.of(dimension, rng);
+ final double[] a = sphere.nextVector();
+ final double[] b = sphere.nextVector();
+
+ // Assign bins
+ final int bins = 10;
+ final int samplesPerBin = 20;
+
+ // To test uniformity within the box assign the position to a bin using:
+ // x - a
+ // ----- * bins
+ // b - a
+ // Pre-compute scaling
+ final double[] scale = new double[dimension];
+ // Precompute the bin offset for each increasing dimension:
+ // 1, bins, bins*bins, bins*bins*bins, ...
+ final int[] offset = new int[dimension];
+ for (int i = 0; i < dimension; i++) {
+ scale[i] = 1.0 / (b[i] - a[i]);
+ offset[i] = (int) Math.pow(bins, i);
+ }
+
+ // Expect a uniform distribution
+ final double[] expected = new double[(int) Math.pow(bins, dimension)];
+ Arrays.fill(expected, 1.0);
+
+ // Increase the loops and use a null seed (i.e. randomly generated) to verify robustness
+ final BoxSampler sampler = BoxSampler.of(a, b, rng);
+ final int samples = expected.length * samplesPerBin;
+ for (int n = 0; n < 1; n++) {
+ // Assign each coordinate to a region inside the box
+ final long[] observed = new long[expected.length];
+ for (int i = 0; i < samples; i++) {
+ final double[] x = sampler.sample();
+ Assert.assertEquals(dimension, x.length);
+ int index = 0;
+ for (int j = 0; j < dimension; j++) {
+ final double c = (x[j] - a[j]) * scale[j];
+ Assert.assertTrue("Not within the box", c >= 0.0 && c <= 1.0);
+ // Get the bin for this dimension (assumes c != 1.0)
+ final int bin = (int) (c * bins);
+ // Add to the final bin index
+ index += bin * offset[j];
+ }
+ // Assign the uniform deviate to a bin
+ observed[index]++;
+ }
+ final double p = new ChiSquareTest().chiSquareTest(expected, observed);
+ Assert.assertFalse("p-value too small: " + p, p < 0.001);
+ }
+ }
+
+ /**
+ * Test the SharedStateSampler implementation for 2D.
+ */
+ @Test
+ public void testSharedStateSampler2D() {
+ testSharedStateSampler(2);
+ }
+
+ /**
+ * Test the SharedStateSampler implementation for 3D.
+ */
+ @Test
+ public void testSharedStateSampler3D() {
+ testSharedStateSampler(3);
+ }
+
+ /**
+ * Test the SharedStateSampler implementation for 4D.
+ */
+ @Test
+ public void testSharedStateSampler4D() {
+ testSharedStateSampler(4);
+ }
+
+ /**
+ * Test the SharedStateSampler implementation for the given dimension.
+ */
+ private static void testSharedStateSampler(int dimension) {
+ final UniformRandomProvider rng1 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
+ final UniformRandomProvider rng2 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
+ final double[] c1 = createCoordinate(1, dimension);
+ final double[] c2 = createCoordinate(2, dimension);
+ final BoxSampler sampler1 = BoxSampler.of(c1, c2, rng1);
+ final BoxSampler sampler2 = sampler1.withUniformRandomProvider(rng2);
+ RandomAssert.assertProduceSameSequence(
+ new RandomAssert.Sampler<double[]>() {
+ @Override
+ public double[] sample() {
+ return sampler1.sample();
+ }
+ },
+ new RandomAssert.Sampler<double[]>() {
+ @Override
+ public double[] sample() {
+ return sampler2.sample();
+ }
+ });
+ }
+
+ /**
+ * Test the input vectors are copied and not used by reference for 2D.
+ */
+ @Test
+ public void testChangedInputCoordinates2D() {
+ testChangedInputCoordinates(2);
+ }
+
+ /**
+ * Test the input vectors are copied and not used by reference for 3D.
+ */
+ @Test
+ public void testChangedInputCoordinates3D() {
+ testChangedInputCoordinates(3);
+ }
+
+ /**
+ * Test the input vectors are copied and not used by reference for 4D.
+ */
+ @Test
+ public void testChangedInputCoordinates4D() {
+ testChangedInputCoordinates(4);
+ }
+
+ /**
+ * Test the input vectors are copied and not used by reference for the given
+ * dimension.
+ *
+ * @param dimension the dimension
+ */
+ private static void testChangedInputCoordinates(int dimension) {
+ final UniformRandomProvider rng1 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
+ final UniformRandomProvider rng2 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
+ final double[] c1 = createCoordinate(1, dimension);
+ final double[] c2 = createCoordinate(2, dimension);
+ final BoxSampler sampler1 = BoxSampler.of(c1, c2, rng1);
+ // Check the input vectors are copied and not used by reference.
+ // Change them in place and create a new sampler. It should have different output
+ // translated by the offset.
+ final double offset = 10;
+ for (int i = 0; i < dimension; i++) {
+ c1[i] += offset;
+ c2[i] += offset;
+ }
+ final BoxSampler sampler2 = BoxSampler.of(c1, c2, rng2);
+ for (int n = 0; n < 3; n++) {
+ final double[] s1 = sampler1.sample();
+ final double[] s2 = sampler2.sample();
+ Assert.assertEquals(s1.length, s2.length);
+ Assert.assertFalse("First sampler has used the vertices by reference",
+ Arrays.equals(s1, s2));
+ for (int i = 0; i < dimension; i++) {
+ Assert.assertEquals(s1[i] + offset, s2[i], 1e-10);
+ }
+ }
+ }
+
+ /**
+ * Creates the coordinate of length specified by the dimension filled with
+ * the given value and the dimension index: x + i.
+ *
+ * @param x the value for index 0
+ * @param dimension the dimension
+ * @return the coordinate
+ */
+ private static double[] createCoordinate(double x, int dimension) {
+ final double[] coord = new double[dimension];
+ for (int i = 0; i < dimension; i++) {
+ coord[0] = x + i;
+ }
+ return coord;
+ }
+}
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index ab5de29..d323cfb 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -75,6 +75,9 @@ re-run tests that fail, and pass the build if they succeed
within the allotted number of reruns (the test will be marked
as 'flaky' in the report).
">
+ <action dev="aherbert" type="add" issue="134">
+ New "BoxSampler" to sample uniformly from a box (or hyperrectangle).
+ </action>
<action dev="aherbert" type="add" issue="133">
New "LineSampler" to sample uniformly on a line segment.
</action>