You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2020/12/20 18:58:55 UTC
[commons-text] branch master updated: Check for null input,
use a ternary expression, refactor a constant, Javadoc.
This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git
The following commit(s) were added to refs/heads/master by this push:
new aa7e49d Check for null input, use a ternary expression, refactor a constant, Javadoc.
aa7e49d is described below
commit aa7e49dda44d442ef9dede756c42eb48a79c0ad1
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Sun Dec 20 13:58:50 2020 -0500
Check for null input, use a ternary expression, refactor a constant,
Javadoc.
Checkstyle: names, imports.
---
.../text/translate/NumericEntityUnescaper.java | 30 +++----
.../apache/commons/text/AlphabetConverterTest.java | 95 ++++++++++++----------
.../apache/commons/text/StringEscapeUtilsTest.java | 1 -
.../similarity/IntersectionSimilarityTest.java | 1 -
.../text/translate/NumericEntityUnescaperTest.java | 4 +-
5 files changed, 69 insertions(+), 62 deletions(-)
diff --git a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
index f9050e7..b1abde5 100644
--- a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
+++ b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
@@ -22,43 +22,49 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
+import org.apache.commons.lang3.ArrayUtils;
+
/**
- * Translate XML numeric entities of the form &#[xX]?\d+;? to
+ * Translates XML numeric entities of the form &#[xX]?\d+;? to
* the specific codepoint.
*
- * Note that the semi-colon is optional.
+ * Note that the semicolon is optional.
*
* @since 1.0
*/
public class NumericEntityUnescaper extends CharSequenceTranslator {
+ /** Default options. */
+ private static final EnumSet<OPTION> DEFAULT_OPTIONS = EnumSet
+ .copyOf(Collections.singletonList(OPTION.semiColonRequired));
+
/** Enumerates NumericEntityUnescaper options for unescaping. */
public enum OPTION {
/**
- * Require a semicolon.
+ * Requires a semicolon.
*/
semiColonRequired,
/**
- * Do not require a semicolon.
+ * Does not require a semicolon.
*/
semiColonOptional,
/**
- * Throw an exception if a semi-colon is missing.
+ * Throws an exception if a semicolon is missing.
*/
errorIfNoSemiColon
}
- /** EnumSet of OPTIONS, given from the constructor. */
+ /** EnumSet of OPTIONS, given from the constructor, read-only. */
private final EnumSet<OPTION> options;
/**
- * Create a UnicodeUnescaper.
+ * Creates a UnicodeUnescaper.
*
* The constructor takes a list of options, only one type of which is currently
- * available (whether to allow, error or ignore the semi-colon on the end of a
+ * available (whether to allow, error or ignore the semicolon on the end of a
* numeric entity to being missing).
*
* For example, to support numeric entities without a ';':
@@ -71,15 +77,11 @@ public class NumericEntityUnescaper extends CharSequenceTranslator {
* @param options to apply to this unescaper
*/
public NumericEntityUnescaper(final OPTION... options) {
- if (options.length > 0) {
- this.options = EnumSet.copyOf(Arrays.asList(options));
- } else {
- this.options = EnumSet.copyOf(Collections.singletonList(OPTION.semiColonRequired));
- }
+ this.options = ArrayUtils.isEmpty(options) ? DEFAULT_OPTIONS : EnumSet.copyOf(Arrays.asList(options));
}
/**
- * Whether the passed in option is currently set.
+ * Tests whether the passed in option is currently set.
*
* @param option to check state of
* @return whether the option is set
diff --git a/src/test/java/org/apache/commons/text/AlphabetConverterTest.java b/src/test/java/org/apache/commons/text/AlphabetConverterTest.java
index 664b3cf..fb10493 100644
--- a/src/test/java/org/apache/commons/text/AlphabetConverterTest.java
+++ b/src/test/java/org/apache/commons/text/AlphabetConverterTest.java
@@ -26,6 +26,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.commons.lang3.ArrayUtils;
import org.junit.jupiter.api.Test;
/**
@@ -33,33 +34,39 @@ import org.junit.jupiter.api.Test;
*/
public class AlphabetConverterTest {
- private static final Character[] lowerCaseEnglish = {' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
- 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
- private static final Character[] englishAndNumbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c',
- 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
- 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
- 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' '};
- private static final Character[] lowerCaseEnglishAndNumbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a',
- 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
- 'w', 'x', 'y', 'z', ' '};
- private static final Character[] numbers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
- private static final Character[] binary = {'0', '1'};
- private static final Character[] hebrew = {'_', ' ', '\u05e7', '\u05e8', '\u05d0', '\u05d8', '\u05d5', '\u05df', '\u05dd',
- '\u05e4', '\u05e9', '\u05d3', '\u05d2', '\u05db', '\u05e2', '\u05d9', '\u05d7', '\u05dc', '\u05da',
- '\u05e3', '\u05d6', '\u05e1', '\u05d1', '\u05d4', '\u05e0', '\u05de', '\u05e6', '\u05ea', '\u05e5'};
- private static final Character[] empty = {};
-
- private static final Integer[] unicode = {32, 35395, 35397, 36302, 36291, 35203, 35201, 35215, 35219, 35268, 97, 98, 99,
- 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 1001, 1002, 1003, 1004, 1005};
- private static final Integer[] lowerCaseEnglishCodepoints = {32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
- 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122};
- private static final Integer[] doNotEncodeCodepoints = {32, 97, 98, 99}; // space, a, b, c
+ private static final Character[] LOWER_CASE_ENGLISH = {' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
+ 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
+
+ private static final Character[] ENGLISH_AND_NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b',
+ 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
+ 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+ 'U', 'V', 'W', 'X', 'Y', 'Z', ' '};
+
+ private static final Character[] LOWER_CASE_ENGLISH_AND_NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
+ 'w', 'x', 'y', 'z', ' '};
+
+ private static final Character[] NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
+
+ private static final Character[] BINARY = {'0', '1'};
+
+ private static final Character[] HEBREW = {'_', ' ', '\u05e7', '\u05e8', '\u05d0', '\u05d8', '\u05d5', '\u05df',
+ '\u05dd', '\u05e4', '\u05e9', '\u05d3', '\u05d2', '\u05db', '\u05e2', '\u05d9', '\u05d7', '\u05dc', '\u05da',
+ '\u05e3', '\u05d6', '\u05e1', '\u05d1', '\u05d4', '\u05e0', '\u05de', '\u05e6', '\u05ea', '\u05e5'};
+
+ private static final Integer[] UNICODE = {32, 35395, 35397, 36302, 36291, 35203, 35201, 35215, 35219, 35268, 97, 98,
+ 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 1001, 1002, 1003, 1004, 1005};
+
+ private static final Integer[] LOWER_CASE_ENGLISH_CODEPOINTS = {32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122};
+
+ private static final Integer[] DO_NOT_ENCODE_CODEPOINTS = {32, 97, 98, 99}; // space, a, b, c
@Test
public void binaryTest() throws UnsupportedEncodingException {
- test(binary, numbers, empty, "0", "1", "10", "11");
- test(numbers, binary, empty, "12345", "0");
- test(lowerCaseEnglish, binary, empty, "abc", "a");
+ test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "0", "1", "10", "11");
+ test(NUMBERS, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "12345", "0");
+ test(LOWER_CASE_ENGLISH, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "abc", "a");
}
private AlphabetConverter createJavadocExample() {
@@ -72,29 +79,29 @@ public class AlphabetConverterTest {
@Test
public void doNotEncodeTest() throws UnsupportedEncodingException {
- test(englishAndNumbers, lowerCaseEnglishAndNumbers, lowerCaseEnglish, "1", "456", "abc", "ABC",
+ test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, "1", "456", "abc", "ABC",
"this will not be converted but THIS WILL");
- test(englishAndNumbers, lowerCaseEnglishAndNumbers, numbers, "1", "456", "abc", "ABC",
+ test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, NUMBERS, "1", "456", "abc", "ABC",
"this will be converted but 12345 and this will be");
}
@Test
public void encodeFailureTest() {
assertThatThrownBy(() -> {
- test(binary, numbers, empty, "3");
+ test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "3");
}).isInstanceOf(UnsupportedEncodingException.class).hasMessage("Couldn't find encoding for '3' in 3");
}
@Test
public void hebrewTest() throws UnsupportedEncodingException {
- test(hebrew, binary, empty, "\u05d0", "\u05e2",
- "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
+ test(HEBREW, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2",
+ "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
+ "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
- test(hebrew, numbers, empty, "\u05d0", "\u05e2",
- "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
+ test(HEBREW, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2",
+ "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
+ "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
- test(numbers, hebrew, empty, "123456789", "1", "5");
- test(lowerCaseEnglish, hebrew, empty, "this is a test");
+ test(NUMBERS, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "123456789", "1", "5");
+ test(LOWER_CASE_ENGLISH, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "this is a test");
}
/*
@@ -114,7 +121,7 @@ public class AlphabetConverterTest {
@Test
public void missingDoNotEncodeLettersFromEncodingTest() {
assertThatThrownBy(() -> {
- AlphabetConverter.createConverterFromChars(englishAndNumbers, lowerCaseEnglish, numbers);
+ AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, NUMBERS);
}).isInstanceOf(IllegalArgumentException.class).hasMessage(
"Can not use 'do not encode' list because encoding alphabet does not contain '0'");
}
@@ -122,7 +129,7 @@ public class AlphabetConverterTest {
@Test
public void missingDoNotEncodeLettersFromOriginalTest() {
assertThatThrownBy(() -> {
- AlphabetConverter.createConverterFromChars(lowerCaseEnglish, englishAndNumbers, numbers);
+ AlphabetConverter.createConverterFromChars(LOWER_CASE_ENGLISH, ENGLISH_AND_NUMBERS, NUMBERS);
}).isInstanceOf(IllegalArgumentException.class).hasMessage(
"Can not use 'do not encode' list because original alphabet does not contain '0'");
}
@@ -130,20 +137,20 @@ public class AlphabetConverterTest {
@Test
public void noEncodingLettersTest() {
assertThatThrownBy(() -> {
- AlphabetConverter.createConverterFromChars(englishAndNumbers, numbers, numbers);
+ AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, NUMBERS, NUMBERS);
}).isInstanceOf(IllegalArgumentException.class).hasMessage(
- "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0");
+ "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0");
}
@Test
public void onlyOneEncodingLettersTest() {
assertThatThrownBy(() -> {
- final Character[] numbersPlusUnderscore = Arrays.copyOf(numbers, numbers.length + 1);
+ final Character[] numbersPlusUnderscore = Arrays.copyOf(NUMBERS, NUMBERS.length + 1);
numbersPlusUnderscore[numbersPlusUnderscore.length - 1] = '_';
- AlphabetConverter.createConverterFromChars(englishAndNumbers, numbersPlusUnderscore, numbers);
+ AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, numbersPlusUnderscore, NUMBERS);
}).isInstanceOf(IllegalArgumentException.class).hasMessage(
- "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1");
+ "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1");
}
private void test(final Character[] originalChars, final Character[] encodingChars,
@@ -258,7 +265,7 @@ public class AlphabetConverterTest {
public void testEqualsWithNull() {
final Character[] characterArray = new Character[0];
final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, null,
- null);
+ null);
assertThat(alphabetConverter.equals(null)).isFalse();
}
@@ -295,13 +302,13 @@ public class AlphabetConverterTest {
"Unexpected string without decoding (XX) in " + toDecode);
}
- /*
+ /**
* Test constructor from code points
*/
@Test
public void unicodeTest() throws UnsupportedEncodingException {
- final AlphabetConverter ac = AlphabetConverter.createConverter(unicode, lowerCaseEnglishCodepoints,
- doNotEncodeCodepoints);
+ final AlphabetConverter ac = AlphabetConverter.createConverter(UNICODE, LOWER_CASE_ENGLISH_CODEPOINTS,
+ DO_NOT_ENCODE_CODEPOINTS);
assertThat(ac.getEncodedCharLength()).isEqualTo(2);
diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
index 3ea582a..6e68957 100644
--- a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
@@ -30,7 +30,6 @@ import java.io.IOException;
import java.io.StringWriter;
import java.lang.reflect.Constructor;
import java.lang.reflect.Modifier;
-import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
diff --git a/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java b/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java
index 3e26e74..5906da6 100644
--- a/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/IntersectionSimilarityTest.java
@@ -20,7 +20,6 @@ import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
diff --git a/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java b/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java
index a8bd26d..4770b05 100644
--- a/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java
+++ b/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java
@@ -55,7 +55,7 @@ public class NumericEntityUnescaperTest {
String expected = "Test \u0030 not test";
String result = neu.translate(input);
- assertThat(result).as("Failed to support unfinished entities (i.e. missing semi-colon)").isEqualTo(expected);
+ assertThat(result).as("Failed to support unfinished entities (i.e. missing semicolon)").isEqualTo(expected);
// ignore it
neu = new NumericEntityUnescaper();
@@ -63,7 +63,7 @@ public class NumericEntityUnescaperTest {
expected = input;
result = neu.translate(input);
- assertThat(result).as("Failed to ignore unfinished entities (i.e. missing semi-colon)").isEqualTo(expected);
+ assertThat(result).as("Failed to ignore unfinished entities (i.e. missing semicolon)").isEqualTo(expected);
// fail it
neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.errorIfNoSemiColon);