You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2019/10/11 13:28:21 UTC

[commons-rng] 09/16: Added systematic failures report to stress test application.

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git

commit 23f4f7c235de32c7bee9c7b113df60677a488488
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Oct 8 13:16:20 2019 +0100

    Added systematic failures report to stress test application.
---
 .../examples/stress/AlphaNumericComparator.java    | 224 +++++++++++++++++++++
 .../rng/examples/stress/ResultsCommand.java        | 178 +++++++++++++---
 2 files changed, 375 insertions(+), 27 deletions(-)

diff --git a/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/AlphaNumericComparator.java b/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/AlphaNumericComparator.java
new file mode 100644
index 0000000..c920753
--- /dev/null
+++ b/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/AlphaNumericComparator.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.examples.stress;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+/**
+ * Provides number sensitive sorting for character sequences.
+ *
+ * <p>Extracts sub-sequences of either numeric ({@code [0, 9]}) or non-numeric characters
+ * and compares them numerically or lexicographically. Leading zeros are ignored from
+ * numbers. Negative numbers are not supported.
+ *
+ * <pre>
+ * Traditional  AlphaNumeric
+ * z0200.html   z2.html
+ * z100.html    z100.html
+ * z2.html      z0200.html
+ * </pre>
+ *
+ * <p>This is based on ideas in the Alphanum algorithm by David Koelle.</p>
+ *
+ * <p>This implementation supports:</p>
+ *
+ * <ul>
+ *  <li>{@link CharSequence} comparison
+ *  <li>Direct use of input sequences for minimal memory consumption
+ *  <li>Numbers with leading zeros
+ * </ul>
+ *
+ * <p>Any null sequences are ordered before non-null sequences.</p>
+ *
+ * <p>Note: The comparator is thread-safe so can be used in a parallel sort.
+ *
+ * @see <a href="http://www.DaveKoelle.com">Alphanum Algorithm</a>
+ */
+class AlphaNumericComparator implements Comparator<CharSequence>, Serializable {
+    /**
+     * An instance.
+     */
+    public static final AlphaNumericComparator INSTANCE = new AlphaNumericComparator();
+
+    /**
+     * The serial version ID.
+     * Note: Comparators are recommended to be Serializable to allow serialization of
+     * collections constructed with a Comparator.
+     */
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public int compare(CharSequence seq1, CharSequence seq2) {
+        if (seq1 == seq2) {
+            return 0;
+        }
+        // Null is less
+        if (seq1 == null) {
+            return -1;
+        }
+        if (seq2 == null) {
+            return 1;
+        }
+
+        int pos1 = 0;
+        int pos2 = 0;
+        final int length1 = seq1.length();
+        final int length2 = seq2.length();
+
+        while (pos1 < length1 && pos2 < length2) {
+            final int end1 = nextSubSequenceEnd(seq1, pos1, length1);
+            final int end2 = nextSubSequenceEnd(seq2, pos2, length2);
+
+            // If both sub-sequences contain numeric characters, sort them numerically
+            int result = 0;
+            if (isDigit(seq1.charAt(pos1)) && isDigit(seq2.charAt(pos2))) {
+                result = compareNumerically(seq1, pos1, end1, seq2, pos2, end2);
+            } else {
+                result = compareLexicographically(seq1, pos1, end1, seq2, pos2, end2);
+            }
+
+            if (result != 0) {
+                return result;
+            }
+
+            pos1 = end1;
+            pos2 = end2;
+        }
+
+        return length1 - length2;
+    }
+
+    /**
+     * Get the end position of the next sub-sequence of either digits or non-digit
+     * characters starting from the start position.
+     *
+     * <p>The end position is exclusive such that the sub-sequence is the interval
+     * {@code [start, end)}.
+     *
+     * @param seq the character sequence
+     * @param start the start position
+     * @param length the sequence length
+     * @return the sub-sequence end position (exclusive)
+     */
+    private static int nextSubSequenceEnd(CharSequence seq, int start, int length) {
+        int pos = start;
+        // Set the sub-sequence type (digits or non-digits)
+        final boolean seqType = isDigit(seq.charAt(pos++));
+        while (pos < length && seqType == isDigit(seq.charAt(pos))) {
+            // Extend the sub-sequence
+            pos++;
+        }
+        return pos;
+    }
+
+    /**
+     * Checks if the character is a digit.
+     *
+     * @param ch the character
+     * @return true if a digit
+     */
+    private static boolean isDigit(char ch) {
+        return ch >= 48 && ch <= 57;
+    }
+
+    /**
+     * Compares two sub-sequences numerically. Ignores leading zeros. Assumes all
+     * characters are digits.
+     *
+     * @param seq1 the first sequence
+     * @param start1 the start of the first sub-sequence
+     * @param end1 the end of the first sub-sequence
+     * @param seq2 the second sequence
+     * @param start2 the start of the second sub-sequence
+     * @param end2 the end of the second sub-sequence sequence
+     * @return the value {@code 0} if the sub-sequences are equal; a value less than
+     * {@code 0} if sub-sequence 1 is numerically less than sub-sequence 2; and a value
+     * greater than {@code 0} if sub-sequence 1 is numerically greater than sub-sequence
+     * 2.
+     */
+    private static int compareNumerically(CharSequence seq1, int start1, int end1,
+                                          CharSequence seq2, int start2, int end2) {
+        // Ignore leading zeros in numbers
+        int pos1 = advancePastLeadingZeros(seq1, start1, end1);
+        int pos2 = advancePastLeadingZeros(seq2, start2, end2);
+
+        // Simple comparison by length
+        final int result = (end1 - pos1) - (end2 - pos2);
+        // If equal, the first different number counts.
+        if (result == 0) {
+            while (pos1 < end1) {
+                final char c1 = seq1.charAt(pos1++);
+                final char c2 = seq2.charAt(pos2++);
+                if (c1 != c2) {
+                    return c1 - c2;
+                }
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Advances past leading zeros in the sub-sequence. Returns the index of the start
+     * character of the number.
+     *
+     * @param seq the sequence
+     * @param start the start of the sub-sequence
+     * @param end the end of the sub-sequence
+     * @return the start index of the number
+     */
+    private static int advancePastLeadingZeros(CharSequence seq, int start, int end) {
+        int pos = start;
+        // Ignore zeros only when there are further characters
+        while (pos < end - 1 && seq.charAt(pos) == '0') {
+            pos++;
+        }
+        return pos;
+    }
+
+    /**
+     * Compares two sub-sequences lexicographically. This matches the compare function in
+     * {@link String} using extracted sub-sequences.
+     *
+     * @param seq1 the first sequence
+     * @param start1 the start of the first sub-sequence
+     * @param end1 the end of the first sub-sequence
+     * @param seq2 the second sequence
+     * @param start2 the start of the second sub-sequence
+     * @param end2 the end of the second sub-sequence sequence
+     * @return the value {@code 0} if the sub-sequences are equal; a value less than
+     * {@code 0} if sub-sequence 1 is lexicographically less than sub-sequence 2; and a
+     * value greater than {@code 0} if sub-sequence 1 is lexicographically greater than
+     * sub-sequence 2.
+     * @see String#compareTo(String)
+     */
+    private static int compareLexicographically(CharSequence seq1, int start1, int end1,
+                                                CharSequence seq2, int start2, int end2) {
+        final int len1 = end1 - start1;
+        final int len2 = end2 - start2;
+        final int limit = Math.min(len1, len2);
+
+        for (int i = 0; i < limit; i++) {
+            final char c1 = seq1.charAt(i + start1);
+            final char c2 = seq2.charAt(i + start2);
+            if (c1 != c2) {
+                return c1 - c2;
+            }
+        }
+        return len1 - len2;
+    }
+}
diff --git a/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/ResultsCommand.java b/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/ResultsCommand.java
index 2f20252..5f3eb9e 100644
--- a/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/ResultsCommand.java
+++ b/commons-rng-examples/examples-stress/src/main/java/org/apache/commons/rng/examples/stress/ResultsCommand.java
@@ -36,9 +36,11 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.Formatter;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map.Entry;
 import java.util.concurrent.Callable;
 import java.util.function.Function;
 import java.util.regex.Matcher;
@@ -84,6 +86,8 @@ class ResultsCommand implements Callable<Void> {
     private static final char BACK_SLASH = '\\';
     /** Character '|'. */
     private static final char PIPE = '|';
+    /** The column name for the RNG. */
+    private static final String COLUMN_RNG = "RNG";
 
     /** The standard options. */
     @Mixin
@@ -108,7 +112,9 @@ class ResultsCommand implements Callable<Void> {
 
     /** The flag to show failed tests. */
     @Option(names = {"--failed"},
-            description = "Output failed tests (not all formats).")
+            description = {"Output failed tests (support varies by format).",
+                           "CSV: List individual test failures.",
+                           "APT: Count of systematic test failures."})
     private boolean showFailedTests;
 
     /** The flag to include the Dieharder sums test. */
@@ -133,6 +139,8 @@ class ResultsCommand implements Callable<Void> {
         APT,
         /** Text table output. */
         TXT,
+        /** Systematic failures output. */
+        FAILURES,
     }
 
     /**
@@ -310,6 +318,9 @@ class ResultsCommand implements Callable<Void> {
             case TXT:
                 writeTXT(out, results);
                 break;
+            case FAILURES:
+                writeFailures(out, results);
+                break;
             default:
                 throw new ApplicationException("Unknown output format: " + outputFormat);
             }
@@ -709,9 +720,14 @@ class ResultsCommand implements Callable<Void> {
                     }
                     for (final String testName : testNames) {
                         final List<TestResult> testResults = getTestResults(results, randomSource, reversed, testName);
-                        writeAPTColumn(output, testResults.stream()
-                                                          .map(toAPTString)
-                                                          .collect(Collectors.joining(", ")));
+                        String text = testResults.stream()
+                                                 .map(toAPTString)
+                                                 .collect(Collectors.joining(", "));
+                        // Add systematic failures in brackets
+                        if (showFailedTests) {
+                            text += " (" + getSystematicFailures(testResults).size() + ")";
+                            writeAPTColumn(output, text);
+                        }
                     }
                     output.newLine();
                     output.write(separator);
@@ -861,16 +877,16 @@ class ResultsCommand implements Callable<Void> {
     }
 
     /**
-     * Write the column name to the output.
+     * Write the column text to the output.
      *
      * @param output Output.
-     * @param name Name.
+     * @param text Text.
      * @throws IOException Signals that an I/O exception has occurred.
      */
     private static void writeAPTColumn(BufferedWriter output,
-                                       String name) throws IOException {
+                                       String text) throws IOException {
         output.write(' ');
-        output.write(name);
+        output.write(text);
         output.write(" |");
     }
 
@@ -899,6 +915,31 @@ class ResultsCommand implements Callable<Void> {
     }
 
     /**
+     * Gets the systematic failures (tests that fail in every test result).
+     *
+     * @param results Results.
+     * @return the systematic failures
+     */
+    private static List<String> getSystematicFailures(List<TestResult> results) {
+        final HashMap<String, Integer> map = new HashMap<>();
+        for (final TestResult result : results) {
+            // Some named tests can fail more than once on different statistics.
+            // For example TestU01 BigCrush LongestHeadRun can output in the summary:
+            // 86  LongestHeadRun, r = 0            eps
+            // 86  LongestHeadRun, r = 0          1 - eps1
+            // This will be counted as 2 failed tests. For the purpose of systematic
+            // failures the name of the test is the same and should be counted once.
+            final HashSet<String> unique = new HashSet<>(result.getFailedTests());
+            for (String test : unique) {
+                map.merge(test, 1, (i, j) -> i + j);
+            }
+        }
+        return map.entrySet().stream().filter(e -> e.getValue() == results.size())
+                                      .map(Entry::getKey)
+                                      .collect(Collectors.toList());
+    }
+
+    /**
      * Write the results as a text table.
      *
      * @param out Output stream.
@@ -917,7 +958,7 @@ class ResultsCommand implements Callable<Void> {
         // Make bit-reversed column optional if no generators are bit reversed.
         final boolean showBitReversedColumn = bitReversed.contains(Boolean.TRUE);
 
-        final List<List<String>> columns = createColumns(testNames, showBitReversedColumn);
+        final List<List<String>> columns = createTXTColumns(testNames, showBitReversedColumn);
 
         // Add all data
         // This will collate results for each combination of 'RandomSource + bitReversed'
@@ -934,43 +975,31 @@ class ResultsCommand implements Callable<Void> {
                     columns.get(i++).add(testResults.stream()
                                                     .map(TestResult::getFailureCountString)
                                                     .collect(Collectors.joining(",")));
+                    columns.get(i++).add(String.valueOf(getSystematicFailures(testResults).size()));
                 }
             }
         }
 
-        // Create format using the column widths
-        final String format = createTextFormatFromColumnWidths(columns);
-
-        // Output
-        try (BufferedWriter output = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
-             Formatter formatter = new Formatter(output)) {
-            final int rows = columns.get(0).size();
-            final Object[] args = new Object[columns.size()];
-            for (int row = 0; row < rows; row++) {
-                for (int i = 0; i < args.length; i++) {
-                    args[i] = columns.get(i).get(row);
-                }
-                formatter.format(format, args);
-            }
-        }
+        writeColumns(out, columns);
     }
 
     /**
-     * Creates the columns.
+     * Creates the columns for the text output.
      *
      * @param testNames the test names
      * @param showBitReversedColumn Set to true to show the bit reversed column
      * @return the list of columns
      */
-    private static List<List<String>> createColumns(final List<String> testNames,
+    private static List<List<String>> createTXTColumns(final List<String> testNames,
         final boolean showBitReversedColumn) {
         final ArrayList<List<String>> columns = new ArrayList<>();
-        columns.add(createColumn("RNG"));
+        columns.add(createColumn(COLUMN_RNG));
         if (showBitReversedColumn) {
             columns.add(createColumn(BIT_REVERSED));
         }
         for (final String testName : testNames) {
             columns.add(createColumn(testName));
+            columns.add(createColumn("∩"));
         }
         return columns;
     }
@@ -1021,4 +1050,99 @@ class ResultsCommand implements Callable<Void> {
         }
         return width;
     }
+
+    /**
+     * Write the columns as fixed width text to the output.
+     *
+     * @param out Output stream.
+     * @param columns Columns
+     * @throws IOException Signals that an I/O exception has occurred.
+     */
+    private static void writeColumns(OutputStream out,
+                                     List<List<String>> columns) throws IOException {
+        // Create format using the column widths
+        final String format = createTextFormatFromColumnWidths(columns);
+
+        // Output
+        try (BufferedWriter output = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
+             Formatter formatter = new Formatter(output)) {
+            final int rows = columns.get(0).size();
+            final Object[] args = new Object[columns.size()];
+            for (int row = 0; row < rows; row++) {
+                for (int i = 0; i < args.length; i++) {
+                    args[i] = columns.get(i).get(row);
+                }
+                formatter.format(format, args);
+            }
+        }
+    }
+
+    /**
+     * Write the systematic failures as a text table.
+     *
+     * @param out Output stream.
+     * @param results Results.
+     * @throws IOException Signals that an I/O exception has occurred.
+     */
+    private static void writeFailures(OutputStream out,
+                                      List<TestResult> results) throws IOException {
+        // Identify all:
+        // RandomSources, bit-reversed, test names,
+        final List<RandomSource> randomSources = getRandomSources(results);
+        final List<Boolean> bitReversed = getBitReversed(results);
+        final List<String> testNames = getTestNames(results);
+
+        // Create columns for RandomSource, bit-reversed, each test name.
+        // Make bit-reversed column optional if no generators are bit reversed.
+        final boolean showBitReversedColumn = bitReversed.contains(Boolean.TRUE);
+
+        final List<List<String>> columns = createFailuresColumns(testNames, showBitReversedColumn);
+
+        final AlphaNumericComparator cmp = new AlphaNumericComparator();
+
+        // Add all data for each combination of 'RandomSource + bitReversed'
+        for (final RandomSource randomSource : randomSources) {
+            for (final boolean reversed : bitReversed) {
+                for (final String testName : testNames) {
+                    final List<TestResult> testResults = getTestResults(results, randomSource,
+                            reversed, testName);
+                    final List<String> failures = getSystematicFailures(testResults);
+                    if (failures.isEmpty()) {
+                        continue;
+                    }
+                    Collections.sort(failures, cmp);
+                    for (String failed : failures) {
+                        int i = 0;
+                        columns.get(i++).add(randomSource.toString());
+                        if (showBitReversedColumn) {
+                            columns.get(i++).add(Boolean.toString(reversed));
+                        }
+                        columns.get(i++).add(testName);
+                        columns.get(i).add(failed);
+                    }
+                }
+            }
+        }
+
+        writeColumns(out, columns);
+    }
+
+    /**
+     * Creates the columns for the failures output.
+     *
+     * @param testNames the test names
+     * @param showBitReversedColumn Set to true to show the bit reversed column
+     * @return the list of columns
+     */
+    private static List<List<String>> createFailuresColumns(final List<String> testNames,
+        final boolean showBitReversedColumn) {
+        final ArrayList<List<String>> columns = new ArrayList<>();
+        columns.add(createColumn(COLUMN_RNG));
+        if (showBitReversedColumn) {
+            columns.add(createColumn(BIT_REVERSED));
+        }
+        columns.add(createColumn("Test Suite"));
+        columns.add(createColumn("Test"));
+        return columns;
+    }
 }