You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2020/12/20 18:58:55 UTC

[commons-text] branch master updated: Check for null input, use a ternary expression, refactor a constant, Javadoc.

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git


The following commit(s) were added to refs/heads/master by this push:
     new aa7e49d  Check for null input, use a ternary expression, refactor a constant, Javadoc.
aa7e49d is described below

commit aa7e49dda44d442ef9dede756c42eb48a79c0ad1
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Sun Dec 20 13:58:50 2020 -0500

    Check for null input, use a ternary expression, refactor a constant,
    Javadoc.
    
    Checkstyle: names, imports.
---
 .../text/translate/NumericEntityUnescaper.java     | 30 +++----
 .../apache/commons/text/AlphabetConverterTest.java | 95 ++++++++++++----------
 .../apache/commons/text/StringEscapeUtilsTest.java |  1 -
 .../similarity/IntersectionSimilarityTest.java     |  1 -
 .../text/translate/NumericEntityUnescaperTest.java |  4 +-
 5 files changed, 69 insertions(+), 62 deletions(-)

diff --git a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
index f9050e7..b1abde5 100644
--- a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
+++ b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
@@ -22,43 +22,49 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 
+import org.apache.commons.lang3.ArrayUtils;
+
 /**
- * Translate XML numeric entities of the form &amp;#[xX]?\d+;? to
+ * Translates XML numeric entities of the form &amp;#[xX]?\d+;? to
  * the specific codepoint.
  *
- * Note that the semi-colon is optional.
+ * Note that the semicolon is optional.
  *
  * @since 1.0
  */
 public class NumericEntityUnescaper extends CharSequenceTranslator {
 
+    /** Default options. */
+    private static final EnumSet<OPTION> DEFAULT_OPTIONS = EnumSet
+        .copyOf(Collections.singletonList(OPTION.semiColonRequired));
+
     /** Enumerates NumericEntityUnescaper options for unescaping. */
     public enum OPTION {
 
         /**
-         * Require a semicolon.
+         * Requires a semicolon.
          */
         semiColonRequired,
 
         /**
-         * Do not require a semicolon.
+         * Does not require a semicolon.
          */
         semiColonOptional,
 
         /**
-         * Throw an exception if a semi-colon is missing.
+         * Throws an exception if a semicolon is missing.
          */
         errorIfNoSemiColon
     }
 
-    /** EnumSet of OPTIONS, given from the constructor. */
+    /** EnumSet of OPTIONS, given from the constructor, read-only. */
     private final EnumSet<OPTION> options;
 
     /**
-     * Create a UnicodeUnescaper.
+     * Creates a UnicodeUnescaper.
      *
      * The constructor takes a list of options, only one type of which is currently
-     * available (whether to allow, error or ignore the semi-colon on the end of a
+     * available (whether to allow, error or ignore the semicolon on the end of a
      * numeric entity to being missing).
      *
      * For example, to support numeric entities without a ';':
@@ -71,15 +77,11 @@ public class NumericEntityUnescaper extends CharSequenceTranslator {
      * @param options to apply to this unescaper
      */
     public NumericEntityUnescaper(final OPTION... options) {
-        if (options.length > 0) {
-            this.options = EnumSet.copyOf(Arrays.asList(options));
-        } else {
-            this.options = EnumSet.copyOf(Collections.singletonList(OPTION.semiColonRequired));
-        }
+        this.options = ArrayUtils.isEmpty(options) ? DEFAULT_OPTIONS : EnumSet.copyOf(Arrays.asList(options));
     }
 
     /**
-     * Whether the passed in option is currently set.
+     * Tests whether the passed in option is currently set.
      *
      * @param option to check state of
      * @return whether the option is set
diff --git a/src/test/java/org/apache/commons/text/AlphabetConverterTest.java b/src/test/java/org/apache/commons/text/AlphabetConverterTest.java
index 664b3cf..fb10493 100644
--- a/src/test/java/org/apache/commons/text/AlphabetConverterTest.java
+++ b/src/test/java/org/apache/commons/text/AlphabetConverterTest.java
@@ -26,6 +26,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.commons.lang3.ArrayUtils;
 import org.junit.jupiter.api.Test;
 
 /**
@@ -33,33 +34,39 @@ import org.junit.jupiter.api.Test;
  */
 public class AlphabetConverterTest {
 
-    private static final Character[] lowerCaseEnglish = {' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
-            'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
-    private static final Character[] englishAndNumbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c',
-            'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
-            'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
-            'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' '};
-    private static final Character[] lowerCaseEnglishAndNumbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a',
-            'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
-            'w', 'x', 'y', 'z', ' '};
-    private static final Character[] numbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
-    private static final Character[] binary = {'0', '1'};
-    private static final Character[] hebrew = {'_', ' ', '\u05e7', '\u05e8', '\u05d0', '\u05d8', '\u05d5', '\u05df', '\u05dd',
-            '\u05e4', '\u05e9', '\u05d3', '\u05d2', '\u05db', '\u05e2', '\u05d9', '\u05d7', '\u05dc', '\u05da',
-            '\u05e3', '\u05d6', '\u05e1', '\u05d1', '\u05d4', '\u05e0', '\u05de', '\u05e6', '\u05ea', '\u05e5'};
-    private static final Character[] empty = {};
-
-    private static final Integer[] unicode = {32, 35395, 35397, 36302, 36291, 35203, 35201, 35215, 35219, 35268, 97, 98, 99,
-            100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 1001, 1002, 1003, 1004, 1005};
-    private static final Integer[] lowerCaseEnglishCodepoints = {32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
-            108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122};
-    private static final Integer[] doNotEncodeCodepoints = {32, 97, 98, 99}; // space, a, b, c
+    private static final Character[] LOWER_CASE_ENGLISH = {' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
+        'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
+
+    private static final Character[] ENGLISH_AND_NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b',
+        'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
+        'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+        'U', 'V', 'W', 'X', 'Y', 'Z', ' '};
+
+    private static final Character[] LOWER_CASE_ENGLISH_AND_NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
+        'w', 'x', 'y', 'z', ' '};
+
+    private static final Character[] NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
+
+    private static final Character[] BINARY = {'0', '1'};
+
+    private static final Character[] HEBREW = {'_', ' ', '\u05e7', '\u05e8', '\u05d0', '\u05d8', '\u05d5', '\u05df',
+        '\u05dd', '\u05e4', '\u05e9', '\u05d3', '\u05d2', '\u05db', '\u05e2', '\u05d9', '\u05d7', '\u05dc', '\u05da',
+        '\u05e3', '\u05d6', '\u05e1', '\u05d1', '\u05d4', '\u05e0', '\u05de', '\u05e6', '\u05ea', '\u05e5'};
+
+    private static final Integer[] UNICODE = {32, 35395, 35397, 36302, 36291, 35203, 35201, 35215, 35219, 35268, 97, 98,
+        99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 1001, 1002, 1003, 1004, 1005};
+
+    private static final Integer[] LOWER_CASE_ENGLISH_CODEPOINTS = {32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
+        107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122};
+
+    private static final Integer[] DO_NOT_ENCODE_CODEPOINTS = {32, 97, 98, 99}; // space, a, b, c
 
     @Test
     public void binaryTest() throws UnsupportedEncodingException {
-        test(binary, numbers, empty, "0", "1", "10", "11");
-        test(numbers, binary, empty, "12345", "0");
-        test(lowerCaseEnglish, binary, empty, "abc", "a");
+        test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "0", "1", "10", "11");
+        test(NUMBERS, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "12345", "0");
+        test(LOWER_CASE_ENGLISH, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "abc", "a");
     }
 
     private AlphabetConverter createJavadocExample() {
@@ -72,29 +79,29 @@ public class AlphabetConverterTest {
 
     @Test
     public void doNotEncodeTest() throws UnsupportedEncodingException {
-        test(englishAndNumbers, lowerCaseEnglishAndNumbers, lowerCaseEnglish, "1", "456", "abc", "ABC",
+        test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, "1", "456", "abc", "ABC",
                 "this will not be converted but THIS WILL");
-        test(englishAndNumbers, lowerCaseEnglishAndNumbers, numbers, "1", "456", "abc", "ABC",
+        test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, NUMBERS, "1", "456", "abc", "ABC",
                 "this will be converted but 12345 and this will be");
     }
 
     @Test
     public void encodeFailureTest() {
         assertThatThrownBy(() -> {
-            test(binary, numbers, empty, "3");
+            test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "3");
         }).isInstanceOf(UnsupportedEncodingException.class).hasMessage("Couldn't find encoding for '3' in 3");
     }
 
     @Test
     public void hebrewTest() throws UnsupportedEncodingException {
-        test(hebrew, binary, empty, "\u05d0", "\u05e2",
-                "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
+        test(HEBREW, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2",
+            "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
                 + "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
-        test(hebrew, numbers, empty, "\u05d0", "\u05e2",
-                "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
+        test(HEBREW, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2",
+            "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
                 + "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
-        test(numbers, hebrew, empty, "123456789", "1", "5");
-        test(lowerCaseEnglish, hebrew, empty, "this is a test");
+        test(NUMBERS, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "123456789", "1", "5");
+        test(LOWER_CASE_ENGLISH, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "this is a test");
     }
 
     /*
@@ -114,7 +121,7 @@ public class AlphabetConverterTest {
     @Test
     public void missingDoNotEncodeLettersFromEncodingTest() {
         assertThatThrownBy(() -> {
-            AlphabetConverter.createConverterFromChars(englishAndNumbers, lowerCaseEnglish, numbers);
+            AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, NUMBERS);
         }).isInstanceOf(IllegalArgumentException.class).hasMessage(
                 "Can not use 'do not encode' list because encoding alphabet does not contain '0'");
     }
@@ -122,7 +129,7 @@ public class AlphabetConverterTest {
     @Test
     public void missingDoNotEncodeLettersFromOriginalTest() {
         assertThatThrownBy(() -> {
-            AlphabetConverter.createConverterFromChars(lowerCaseEnglish, englishAndNumbers, numbers);
+            AlphabetConverter.createConverterFromChars(LOWER_CASE_ENGLISH, ENGLISH_AND_NUMBERS, NUMBERS);
         }).isInstanceOf(IllegalArgumentException.class).hasMessage(
                 "Can not use 'do not encode' list because original alphabet does not contain '0'");
     }
@@ -130,20 +137,20 @@ public class AlphabetConverterTest {
     @Test
     public void noEncodingLettersTest() {
         assertThatThrownBy(() -> {
-            AlphabetConverter.createConverterFromChars(englishAndNumbers, numbers, numbers);
+            AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, NUMBERS, NUMBERS);
         }).isInstanceOf(IllegalArgumentException.class).hasMessage(
-                "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0");
+            "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0");
     }
 
     @Test
     public void onlyOneEncodingLettersTest() {
         assertThatThrownBy(() -> {
-            final Character[] numbersPlusUnderscore = Arrays.copyOf(numbers, numbers.length + 1);
+            final Character[] numbersPlusUnderscore = Arrays.copyOf(NUMBERS, NUMBERS.length + 1);
             numbersPlusUnderscore[numbersPlusUnderscore.length - 1] = '_';
 
-            AlphabetConverter.createConverterFromChars(englishAndNumbers, numbersPlusUnderscore, numbers);
+            AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, numbersPlusUnderscore, NUMBERS);
         }).isInstanceOf(IllegalArgumentException.class).hasMessage(
-                "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1");
+            "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1");
     }
 
     private void test(final Character[] originalChars, final Character[] encodingChars,
@@ -258,7 +265,7 @@ public class AlphabetConverterTest {
     public void testEqualsWithNull() {
         final Character[] characterArray = new Character[0];
         final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, null,
-                null);
+            null);
 
         assertThat(alphabetConverter.equals(null)).isFalse();
     }
@@ -295,13 +302,13 @@ public class AlphabetConverterTest {
                 "Unexpected string without decoding (XX) in " + toDecode);
     }
 
-    /*
+    /**
      * Test constructor from code points
      */
     @Test
     public void unicodeTest() throws UnsupportedEncodingException {
-        final AlphabetConverter ac = AlphabetConverter.createConverter(unicode, lowerCaseEnglishCodepoints,
-                doNotEncodeCodepoints);
+        final AlphabetConverter ac = AlphabetConverter.createConverter(UNICODE, LOWER_CASE_ENGLISH_CODEPOINTS,
+            DO_NOT_ENCODE_CODEPOINTS);
 
         assertThat(ac.getEncodedCharLength()).isEqualTo(2);
 
diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
index 3ea582a..6e68957 100644
--- a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
@@ -30,7 +30,6 @@ import java.io.IOException;
 import java.io.StringWriter;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Modifier;
-import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Paths;
diff --git a/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java b/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java
index 3e26e74..5906da6 100644
--- a/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java
@@ -20,7 +20,6 @@ import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
diff --git a/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java b/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java
index a8bd26d..4770b05 100644
--- a/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java
+++ b/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java
@@ -55,7 +55,7 @@ public class NumericEntityUnescaperTest  {
         String expected = "Test \u0030 not test";
 
         String result = neu.translate(input);
-        assertThat(result).as("Failed to support unfinished entities (i.e. missing semi-colon)").isEqualTo(expected);
+        assertThat(result).as("Failed to support unfinished entities (i.e. missing semicolon)").isEqualTo(expected);
 
         // ignore it
         neu = new NumericEntityUnescaper();
@@ -63,7 +63,7 @@ public class NumericEntityUnescaperTest  {
         expected = input;
 
         result = neu.translate(input);
-        assertThat(result).as("Failed to ignore unfinished entities (i.e. missing semi-colon)").isEqualTo(expected);
+        assertThat(result).as("Failed to ignore unfinished entities (i.e. missing semicolon)").isEqualTo(expected);
 
         // fail it
         neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.errorIfNoSemiColon);