You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2019/08/06 10:42:25 UTC
[commons-rng] 02/07: RNG-85: Added SeedUtils to allow generation of
hex permutations.
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit 69b966f3ab135fe3f507f69de3d97bbac64fb24b
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Mon Aug 5 11:36:32 2019 +0100
RNG-85: Added SeedUtils to allow generation of hex permutations.
This is to be used to seed the Middle Square Weyl Sequence generator.
---
commons-rng-simple/pom.xml | 7 +
.../commons/rng/simple/internal/SeedUtils.java | 213 +++++++++++++++++++++
.../commons/rng/simple/internal/SeedUtilsTest.java | 104 ++++++++++
3 files changed, 324 insertions(+)
diff --git a/commons-rng-simple/pom.xml b/commons-rng-simple/pom.xml
index 84c3210..116c7fc 100644
--- a/commons-rng-simple/pom.xml
+++ b/commons-rng-simple/pom.xml
@@ -50,6 +50,13 @@
<artifactId>commons-rng-core</artifactId>
<version>1.3-SNAPSHOT</version>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <version>3.6.1</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
</project>
diff --git a/commons-rng-simple/src/main/java/org/apache/commons/rng/simple/internal/SeedUtils.java b/commons-rng-simple/src/main/java/org/apache/commons/rng/simple/internal/SeedUtils.java
new file mode 100644
index 0000000..5d6aa14
--- /dev/null
+++ b/commons-rng-simple/src/main/java/org/apache/commons/rng/simple/internal/SeedUtils.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.simple.internal;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.core.util.NumberFactory;
+
+/**
+ * Utility for creating seeds.
+ */
+final class SeedUtils {
+ /** The modulus {@code 256 % 9}. */
+ private static final int MOD_09 = 256 % 9;
+ /** The modulus {@code 256 % 10}. */
+ private static final int MOD_10 = 256 % 10;
+ /** The modulus {@code 256 % 11}. */
+ private static final int MOD_11 = 256 % 11;
+ /** The modulus {@code 256 % 12}. */
+ private static final int MOD_12 = 256 % 12;
+ /** The modulus {@code 256 % 13}. */
+ private static final int MOD_13 = 256 % 13;
+ /** The modulus {@code 256 % 14}. */
+ private static final int MOD_14 = 256 % 14;
+ /** The modulus {@code 256 % 15}. */
+ private static final int MOD_15 = 256 % 15;
+ /** The 16 hex digits in an array. */
+ private static final byte[] HEX_DIGIT_ARRAY = {0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0};
+
+ /**
+ * Provider of unsigned 8-bit integers.
+ */
+ private static class UnsignedByteProvider {
+ /** Mask to extract the lowest 2 bits from an integer. */
+ private static final int MASK_2_BITS = 0x3;
+ /** Mask to extract the lowest 8 bits from an integer. */
+ private static final int MASK_8_BITS = 0xff;
+
+ /** Source of randomness. */
+ private final UniformRandomProvider rng;
+ /** The current 32-bits of randomness. */
+ private int bits;
+ /** The counter tracking the bits to extract. */
+ private int counter;
+
+ /**
+ * @param rng Source of randomness.
+ */
+ UnsignedByteProvider(UniformRandomProvider rng) {
+ this.rng = rng;
+ }
+
+ /**
+ * Return the next unsigned byte.
+ *
+ * @return Value in the range[0,255]
+ */
+ int nextUnsignedByte() {
+ // Every 4 bytes generate a new 32-bit value
+ final int count = counter & MASK_2_BITS;
+ counter++;
+ if (count == 0) {
+ bits = rng.nextInt();
+ // Consume the first byte
+ return bits & MASK_8_BITS;
+ }
+ // Consume a remaining byte (occurs 3 times)
+ bits >>>= 8;
+ return bits & MASK_8_BITS;
+ }
+ }
+
+ /**
+ * Class contains only static methods.
+ */
+ private SeedUtils() {}
+
+ /**
+ * Creates an {@code int} containing a permutation of 8 hex digits chosen from 16.
+ *
+ * @param rng Source of randomness.
+ * @return hex digit permutation.
+ */
+ static int createIntHexPermutation(UniformRandomProvider rng) {
+ final UnsignedByteProvider provider = new UnsignedByteProvider(rng);
+ return createUpperBitsHexPermutation(provider);
+ }
+
+ /**
+ * Creates a {@code long} containing a permutation of 8 hex digits chosen from 16 in
+ * the upper and lower 32-bits.
+ *
+ * @param rng Source of randomness.
+ * @return hex digit permutation.
+ */
+ static long createLongHexPermutation(UniformRandomProvider rng) {
+ final UnsignedByteProvider provider = new UnsignedByteProvider(rng);
+ // Extract upper bits and combine with a second sample
+ return NumberFactory.makeLong(createUpperBitsHexPermutation(provider),
+ createUpperBitsHexPermutation(provider));
+ }
+
+ /**
+ * Creates a {@code int} containing a permutation of 8 hex digits chosen from 16.
+ *
+ * @param provider Source of randomness.
+ * @return hex digit permutation.
+ */
+ private static int createUpperBitsHexPermutation(UnsignedByteProvider provider) {
+ // Compute a Fisher-Yates shuffle in-place on the 16 hex digits.
+ // Each digit is chosen uniformly from the remaining digits.
+ // The value is swapped with the current digit.
+
+ // The following is an optimised sampling algorithm that generates
+ // a uniform deviate in the range [0,n) from an unsigned byte.
+ // The expected number of samples is 256/n.
+ // This has a bias when n does not divide into 256. So samples must
+ // be rejected at a rate of (256 % n) / 256:
+ // n 256 % n Rejection rate
+ // 9 4 0.015625
+ // 10 6 0.0234375
+ // 11 3 0.01171875
+ // 12 4 0.015625
+ // 13 9 0.03515625
+ // 14 4 0.015625
+ // 15 1 0.00390625
+ // 16 0 0
+ // The probability of no rejections is 1 - (1-p15) * (1-p14) ... = 0.115
+
+ final byte[] digits = HEX_DIGIT_ARRAY.clone();
+
+ // The first digit has no bias. Get an index using a mask to avoid modulus.
+ int bits = copyToOutput(digits, 0, 15, provider.nextUnsignedByte() & 0xf);
+
+ // All remaining digits have a bias so use the rejection algorithm to find
+ // an appropriate uniform deviate in the range [0,n)
+ bits = copyToOutput(digits, bits, 14, nextUnsignedByteInRange(provider, MOD_15, 15));
+ bits = copyToOutput(digits, bits, 13, nextUnsignedByteInRange(provider, MOD_14, 14));
+ bits = copyToOutput(digits, bits, 12, nextUnsignedByteInRange(provider, MOD_13, 13));
+ bits = copyToOutput(digits, bits, 11, nextUnsignedByteInRange(provider, MOD_12, 12));
+ bits = copyToOutput(digits, bits, 10, nextUnsignedByteInRange(provider, MOD_11, 11));
+ bits = copyToOutput(digits, bits, 9, nextUnsignedByteInRange(provider, MOD_10, 10));
+ bits = copyToOutput(digits, bits, 8, nextUnsignedByteInRange(provider, MOD_09, 9));
+
+ return bits;
+ }
+
+
+ /**
+ * Get the next unsigned byte in the range {@code [0,n)} rejecting any over-represented
+ * sample value using the pre-computed modulus.
+ *
+ * <p>This algorithm is as per Lemire (2019) adapted for 8-bit arithmetic.</p>
+ *
+ * @param provider Provider of bytes.
+ * @param threshold Modulus threshold {@code 256 % n}.
+ * @param n Upper range (exclusive)
+ * @return Value.
+ * @see <a href="https://arxiv.org/abs/1805.10941">
+ * Lemire (2019): Fast Random Integer Generation in an Interval</a>
+ */
+ private static int nextUnsignedByteInRange(UnsignedByteProvider provider, int threshold, int n) {
+ // Rejection method using multiply by a fraction:
+ // n * [0, 2^8 - 1)
+ // ------------
+ // 2^8
+ // The result is mapped back to an integer and will be in the range [0, n)
+ int m;
+ do {
+ // Compute product of n * [0, 2^8 - 1)
+ m = n * provider.nextUnsignedByte();
+
+ // Test the sample uniformity.
+ } while ((m & 0xff) < threshold);
+ // Final divide by 2^8
+ return m >>> 8;
+ }
+
+ /**
+ * Copy the lower hex digit to the output bits. Swap the upper hex digit into
+ * the lower position. This is equivalent to a swap step of a Fisher-Yates shuffle on
+ * an array but the output of the shuffle are written to the bits.
+ *
+ * @param digits Digits.
+ * @param bits Bits.
+ * @param upper Upper index.
+ * @param lower Lower index.
+ * @return Updated bits.
+ */
+ private static int copyToOutput(byte[] digits, int bits, int upper, int lower) {
+ // Move the existing bits up and append the next hex digit.
+ // This is equivalent to swapping lower to upper.
+ final int newbits = bits << 4 | digits[lower] & 0xf;
+ // Swap upper to lower.
+ digits[lower] = digits[upper];
+ return newbits;
+ }
+}
diff --git a/commons-rng-simple/src/test/java/org/apache/commons/rng/simple/internal/SeedUtilsTest.java b/commons-rng-simple/src/test/java/org/apache/commons/rng/simple/internal/SeedUtilsTest.java
new file mode 100644
index 0000000..412cc8c
--- /dev/null
+++ b/commons-rng-simple/src/test/java/org/apache/commons/rng/simple/internal/SeedUtilsTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.simple.internal;
+
+import org.apache.commons.math3.stat.inference.ChiSquareTest;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.core.source64.SplitMix64;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+/**
+ * Tests for the {@link SeedUtils}.
+ */
+public class SeedUtilsTest {
+ /**
+ * Test the int hex permutation has 8 unique hex digits per permutation.
+ * A uniformity test is performed on to check each hex digits is used evenly at each
+ * character position.
+ */
+ @Test
+ public void testCreateIntHexPermutation() {
+ final UniformRandomProvider rng = new SplitMix64(-567435247L);
+ final long[][] samples = new long[8][16];
+ for (int i = 0; i < 1000; i++) {
+ int sample = SeedUtils.createIntHexPermutation(rng);
+ int observed = 0;
+ for (int j = 0; j < 8; j++) {
+ final int digit = sample & 0xf;
+ Assert.assertEquals("Duplicate digit in sample", 0, observed & (1 << digit));
+ observed |= 1 << digit;
+ samples[j][digit]++;
+ sample >>>= 4;
+ }
+ }
+
+ final ChiSquareTest chiSquareTest = new ChiSquareTest();
+ final double[] expected = new double[16];
+ Arrays.fill(expected, 1.0 / 16);
+ // Pass if we cannot reject null hypothesis that distributions are the same.
+ for (int j = 0; j < 8; j++) {
+ Assert.assertFalse("Not uniform in digit " + j,
+ chiSquareTest.chiSquareTest(expected, samples[j], 0.001));
+ }
+ }
+
+ /**
+ * Test the long hex permutation has 8 unique hex digits per permutation in the upper and
+ * lower 32-bits.
+ * A uniformity test is performed on to check each hex digits is used evenly at each
+ * character position.
+ */
+ @Test
+ public void testCreateLongHexPermutation() {
+ final UniformRandomProvider rng = new SplitMix64(34645768L);
+ final long[][] samples = new long[16][16];
+ for (int i = 0; i < 1000; i++) {
+ long sample = SeedUtils.createLongHexPermutation(rng);
+ // Check lower 32-bits
+ long observed = 0;
+ for (int j = 0; j < 8; j++) {
+ final int digit = (int) (sample & 0xfL);
+ Assert.assertEquals("Duplicate digit in lower sample", 0, observed & (1 << digit));
+ observed |= 1 << digit;
+ samples[j][digit]++;
+ sample >>>= 4;
+ }
+ // Check upper 32-bits
+ observed = 0;
+ for (int j = 8; j < 16; j++) {
+ final int digit = (int) (sample & 0xfL);
+ Assert.assertEquals("Duplicate digit in upper sample", 0, observed & (1 << digit));
+ observed |= 1 << digit;
+ samples[j][digit]++;
+ sample >>>= 4;
+ }
+ }
+
+ final ChiSquareTest chiSquareTest = new ChiSquareTest();
+ final double[] expected = new double[16];
+ Arrays.fill(expected, 1.0 / 16);
+ // Pass if we cannot reject null hypothesis that distributions are the same.
+ for (int j = 0; j < 16; j++) {
+ Assert.assertFalse("Not uniform in digit " + j,
+ chiSquareTest.chiSquareTest(expected, samples[j], 0.001));
+ }
+ }
+}