You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ch...@apache.org on 2017/02/10 13:17:38 UTC
[25/25] [text] chore: update packages back to
org.apache.commons.text.*
chore: update packages back to org.apache.commons.text.*
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/c7cf533d
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/c7cf533d
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/c7cf533d
Branch: refs/heads/master
Commit: c7cf533d2f1e22624d8210194ade266e97e6f7a0
Parents: 348aa51
Author: Rob Tompkins <ch...@apache.org>
Authored: Fri Feb 10 08:17:04 2017 -0500
Committer: Rob Tompkins <ch...@apache.org>
Committed: Fri Feb 10 08:17:04 2017 -0500
----------------------------------------------------------------------
fb-excludes.xml | 4 +-
.../apache/commons/text/AlphabetConverter.java | 530 +++
.../java/org/apache/commons/text/Builder.java | 88 +
.../apache/commons/text/CharacterPredicate.java | 37 +
.../commons/text/CharacterPredicates.java | 52 +
.../apache/commons/text/CompositeFormat.java | 116 +
.../commons/text/ExtendedMessageFormat.java | 571 ++++
.../org/apache/commons/text/FormatFactory.java | 41 +
.../apache/commons/text/FormattableUtils.java | 156 +
.../org/apache/commons/text/StrBuilder.java | 3092 ++++++++++++++++++
.../java/org/apache/commons/text/StrLookup.java | 180 +
.../org/apache/commons/text/StrMatcher.java | 438 +++
.../org/apache/commons/text/StrSubstitutor.java | 1213 +++++++
.../org/apache/commons/text/StrTokenizer.java | 1118 +++++++
.../apache/commons/text/StringEscapeUtils.java | 959 ++++++
.../commons/text/beta/AlphabetConverter.java | 530 ---
.../org/apache/commons/text/beta/Builder.java | 88 -
.../commons/text/beta/CharacterPredicate.java | 37 -
.../commons/text/beta/CharacterPredicates.java | 52 -
.../commons/text/beta/CompositeFormat.java | 116 -
.../text/beta/ExtendedMessageFormat.java | 571 ----
.../apache/commons/text/beta/FormatFactory.java | 41 -
.../commons/text/beta/FormattableUtils.java | 156 -
.../apache/commons/text/beta/StrBuilder.java | 3092 ------------------
.../org/apache/commons/text/beta/StrLookup.java | 180 -
.../apache/commons/text/beta/StrMatcher.java | 438 ---
.../commons/text/beta/StrSubstitutor.java | 1213 -------
.../apache/commons/text/beta/StrTokenizer.java | 1118 -------
.../commons/text/beta/StringEscapeUtils.java | 959 ------
.../commons/text/beta/diff/CommandVisitor.java | 146 -
.../commons/text/beta/diff/DeleteCommand.java | 56 -
.../commons/text/beta/diff/EditCommand.java | 88 -
.../commons/text/beta/diff/EditScript.java | 133 -
.../commons/text/beta/diff/InsertCommand.java | 58 -
.../commons/text/beta/diff/KeepCommand.java | 58 -
.../text/beta/diff/ReplacementsFinder.java | 124 -
.../text/beta/diff/ReplacementsHandler.java | 52 -
.../text/beta/diff/StringsComparator.java | 331 --
.../commons/text/beta/diff/package-info.java | 25 -
.../apache/commons/text/beta/package-info.java | 22 -
.../text/beta/similarity/CosineDistance.java | 57 -
.../text/beta/similarity/CosineSimilarity.java | 102 -
.../commons/text/beta/similarity/Counter.java | 62 -
.../text/beta/similarity/EditDistance.java | 59 -
.../text/beta/similarity/EditDistanceFrom.java | 112 -
.../text/beta/similarity/FuzzyScore.java | 144 -
.../text/beta/similarity/HammingDistance.java | 78 -
.../text/beta/similarity/JaccardDistance.java | 55 -
.../text/beta/similarity/JaccardSimilarity.java | 88 -
.../beta/similarity/JaroWinklerDistance.java | 157 -
.../similarity/LevenshteinDetailedDistance.java | 519 ---
.../beta/similarity/LevenshteinDistance.java | 396 ---
.../beta/similarity/LevenshteinResults.java | 125 -
.../similarity/LongestCommonSubsequence.java | 144 -
.../LongestCommonSubsequenceDistance.java | 64 -
.../text/beta/similarity/RegexTokenizer.java | 52 -
.../text/beta/similarity/SimilarityScore.java | 63 -
.../beta/similarity/SimilarityScoreFrom.java | 112 -
.../commons/text/beta/similarity/Tokenizer.java | 35 -
.../text/beta/similarity/package-info.java | 44 -
.../beta/translate/AggregateTranslator.java | 65 -
.../beta/translate/CharSequenceTranslator.java | 135 -
.../beta/translate/CodePointTranslator.java | 51 -
.../text/beta/translate/CsvTranslators.java | 82 -
.../text/beta/translate/EntityArrays.java | 445 ---
.../text/beta/translate/JavaUnicodeEscaper.java | 113 -
.../text/beta/translate/LookupTranslator.java | 100 -
.../beta/translate/NumericEntityEscaper.java | 118 -
.../beta/translate/NumericEntityUnescaper.java | 138 -
.../text/beta/translate/OctalUnescaper.java | 79 -
.../beta/translate/SingleLookupTranslator.java | 147 -
.../beta/translate/SinglePassTranslator.java | 54 -
.../text/beta/translate/UnicodeEscaper.java | 137 -
.../text/beta/translate/UnicodeUnescaper.java | 64 -
.../UnicodeUnpairedSurrogateRemover.java | 42 -
.../text/beta/translate/package-info.java | 24 -
.../commons/text/diff/CommandVisitor.java | 146 +
.../apache/commons/text/diff/DeleteCommand.java | 56 +
.../apache/commons/text/diff/EditCommand.java | 88 +
.../apache/commons/text/diff/EditScript.java | 133 +
.../apache/commons/text/diff/InsertCommand.java | 58 +
.../apache/commons/text/diff/KeepCommand.java | 58 +
.../commons/text/diff/ReplacementsFinder.java | 124 +
.../commons/text/diff/ReplacementsHandler.java | 52 +
.../commons/text/diff/StringsComparator.java | 331 ++
.../apache/commons/text/diff/package-info.java | 25 +
.../org/apache/commons/text/package-info.java | 22 +
.../commons/text/similarity/CosineDistance.java | 57 +
.../text/similarity/CosineSimilarity.java | 102 +
.../apache/commons/text/similarity/Counter.java | 62 +
.../commons/text/similarity/EditDistance.java | 59 +
.../text/similarity/EditDistanceFrom.java | 112 +
.../commons/text/similarity/FuzzyScore.java | 144 +
.../text/similarity/HammingDistance.java | 78 +
.../text/similarity/JaccardDistance.java | 55 +
.../text/similarity/JaccardSimilarity.java | 88 +
.../text/similarity/JaroWinklerDistance.java | 157 +
.../similarity/LevenshteinDetailedDistance.java | 519 +++
.../text/similarity/LevenshteinDistance.java | 396 +++
.../text/similarity/LevenshteinResults.java | 125 +
.../similarity/LongestCommonSubsequence.java | 144 +
.../LongestCommonSubsequenceDistance.java | 64 +
.../commons/text/similarity/RegexTokenizer.java | 52 +
.../text/similarity/SimilarityScore.java | 63 +
.../text/similarity/SimilarityScoreFrom.java | 112 +
.../commons/text/similarity/Tokenizer.java | 35 +
.../commons/text/similarity/package-info.java | 44 +
.../text/translate/AggregateTranslator.java | 65 +
.../text/translate/CharSequenceTranslator.java | 135 +
.../text/translate/CodePointTranslator.java | 51 +
.../commons/text/translate/CsvTranslators.java | 82 +
.../commons/text/translate/EntityArrays.java | 445 +++
.../text/translate/JavaUnicodeEscaper.java | 113 +
.../text/translate/LookupTranslator.java | 100 +
.../text/translate/NumericEntityEscaper.java | 118 +
.../text/translate/NumericEntityUnescaper.java | 138 +
.../commons/text/translate/OctalUnescaper.java | 79 +
.../text/translate/SingleLookupTranslator.java | 147 +
.../text/translate/SinglePassTranslator.java | 54 +
.../commons/text/translate/UnicodeEscaper.java | 137 +
.../text/translate/UnicodeUnescaper.java | 64 +
.../UnicodeUnpairedSurrogateRemover.java | 42 +
.../commons/text/translate/package-info.java | 24 +
.../commons/text/AlphabetConverterTest.java | 204 ++
.../commons/text/CharacterPredicatesTest.java | 47 +
.../commons/text/CompositeFormatTest.java | 85 +
.../commons/text/ExtendedMessageFormatTest.java | 497 +++
.../commons/text/FormattableUtilsTest.java | 142 +
.../text/StrBuilderAppendInsertTest.java | 1605 +++++++++
.../org/apache/commons/text/StrBuilderTest.java | 2007 ++++++++++++
.../org/apache/commons/text/StrLookupTest.java | 115 +
.../org/apache/commons/text/StrMatcherTest.java | 214 ++
.../apache/commons/text/StrSubstitutorTest.java | 740 +++++
.../apache/commons/text/StrTokenizerTest.java | 913 ++++++
.../commons/text/StringEscapeUtilsTest.java | 601 ++++
.../text/beta/AlphabetConverterTest.java | 204 --
.../text/beta/CharacterPredicatesTest.java | 47 -
.../commons/text/beta/CompositeFormatTest.java | 85 -
.../text/beta/ExtendedMessageFormatTest.java | 497 ---
.../commons/text/beta/FormattableUtilsTest.java | 142 -
.../text/beta/StrBuilderAppendInsertTest.java | 1605 ---------
.../commons/text/beta/StrBuilderTest.java | 2007 ------------
.../apache/commons/text/beta/StrLookupTest.java | 115 -
.../commons/text/beta/StrMatcherTest.java | 214 --
.../commons/text/beta/StrSubstitutorTest.java | 740 -----
.../commons/text/beta/StrTokenizerTest.java | 914 ------
.../text/beta/StringEscapeUtilsTest.java | 601 ----
.../text/beta/diff/ReplacementsFinderTest.java | 108 -
.../text/beta/diff/StringsComparatorTest.java | 126 -
.../beta/similarity/CosineDistanceTest.java | 70 -
.../text/beta/similarity/FuzzyScoreTest.java | 64 -
.../beta/similarity/HammingDistanceTest.java | 58 -
.../beta/similarity/JaccardDistanceTest.java | 67 -
.../beta/similarity/JaccardSimilarityTest.java | 67 -
.../similarity/JaroWinklerDistanceTest.java | 62 -
.../LevenshteinDetailedDistanceTest.java | 402 ---
.../similarity/LevenshteinDistanceTest.java | 139 -
.../LongestCommonSubsequenceDistanceTest.java | 68 -
.../LongestCommonSubsequenceTest.java | 99 -
.../ParameterizedEditDistanceFromTest.java | 90 -
.../ParameterizedLevenshteinDistanceTest.java | 125 -
.../ParameterizedSimilarityScoreFromTest.java | 81 -
.../beta/similarity/StringMetricFromTest.java | 66 -
.../beta/translate/AggregateTranslatorTest.java | 66 -
.../text/beta/translate/EntityArraysTest.java | 129 -
.../beta/translate/JavaUnicodeEscaperTest.java | 65 -
.../beta/translate/LookupTranslatorTest.java | 57 -
.../translate/NumericEntityEscaperTest.java | 68 -
.../translate/NumericEntityUnescaperTest.java | 80 -
.../text/beta/translate/OctalUnescaperTest.java | 82 -
.../translate/SinglePassTranslatorTest.java | 57 -
.../text/beta/translate/UnicodeEscaperTest.java | 55 -
.../beta/translate/UnicodeUnescaperTest.java | 60 -
.../UnicodeUnpairedSurrogateRemoverTest.java | 47 -
.../text/diff/ReplacementsFinderTest.java | 108 +
.../text/diff/StringsComparatorTest.java | 126 +
.../text/similarity/CosineDistanceTest.java | 70 +
.../commons/text/similarity/FuzzyScoreTest.java | 64 +
.../text/similarity/HammingDistanceTest.java | 58 +
.../text/similarity/JaccardDistanceTest.java | 67 +
.../text/similarity/JaccardSimilarityTest.java | 67 +
.../similarity/JaroWinklerDistanceTest.java | 62 +
.../LevenshteinDetailedDistanceTest.java | 402 +++
.../similarity/LevenshteinDistanceTest.java | 139 +
.../LongestCommonSubsequenceDistanceTest.java | 68 +
.../LongestCommonSubsequenceTest.java | 99 +
.../ParameterizedEditDistanceFromTest.java | 90 +
.../ParameterizedLevenshteinDistanceTest.java | 125 +
.../ParameterizedSimilarityScoreFromTest.java | 81 +
.../text/similarity/StringMetricFromTest.java | 66 +
.../text/translate/AggregateTranslatorTest.java | 66 +
.../text/translate/EntityArraysTest.java | 129 +
.../text/translate/JavaUnicodeEscaperTest.java | 65 +
.../text/translate/LookupTranslatorTest.java | 57 +
.../translate/NumericEntityEscaperTest.java | 68 +
.../translate/NumericEntityUnescaperTest.java | 80 +
.../text/translate/OctalUnescaperTest.java | 82 +
.../translate/SinglePassTranslatorTest.java | 57 +
.../text/translate/UnicodeEscaperTest.java | 55 +
.../text/translate/UnicodeUnescaperTest.java | 60 +
.../UnicodeUnpairedSurrogateRemoverTest.java | 47 +
201 files changed, 23576 insertions(+), 23577 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/fb-excludes.xml
----------------------------------------------------------------------
diff --git a/fb-excludes.xml b/fb-excludes.xml
index 0ad46c4..30cf5d9 100644
--- a/fb-excludes.xml
+++ b/fb-excludes.xml
@@ -18,12 +18,12 @@
<FindBugsFilter>
<Match>
- <Class name="org.apache.commons.text.beta.ExtendedMessageFormat" />
+ <Class name="org.apache.commons.text.ExtendedMessageFormat" />
<Bug code="UR" />
</Match>
<Match>
- <Class name="org.apache.commons.text.beta.StrTokenizer" />
+ <Class name="org.apache.commons.text.StrTokenizer" />
<Method name="clone" />
<Bug code="CN" />
</Match>
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/AlphabetConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/AlphabetConverter.java b/src/main/java/org/apache/commons/text/AlphabetConverter.java
new file mode 100644
index 0000000..27de69a
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/AlphabetConverter.java
@@ -0,0 +1,530 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * <p>
+ * Convert from one alphabet to another, with the possibility of leaving certain
+ * characters unencoded.
+ * </p>
+ *
+ * <p>
+ * The target and do not encode languages must be in the Unicode BMP, but the
+ * source language does not.
+ * </p>
+ *
+ * <p>
+ * The encoding will all be of a fixed length, except for the 'do not encode'
+ * chars, which will be of length 1
+ * </p>
+ *
+ * <h3>Sample usage</h3>
+ *
+ * <pre>
+ * Character[] originals; // a, b, c, d
+ * Character[] encoding; // 0, 1, d
+ * Character[] doNotEncode; // d
+ *
+ * AlphabetConverter ac = AlphabetConverter.createConverterFromChars(originals,
+ * encoding, doNotEncode);
+ *
+ * ac.encode("a"); // 00
+ * ac.encode("b"); // 01
+ * ac.encode("c"); // 0d
+ * ac.encode("d"); // d
+ * ac.encode("abcd"); // 00010dd
+ * </pre>
+ *
+ * <p>
+ * #ThreadSafe# AlphabetConverter class methods are threadsafe as they do not
+ * change internal state.
+ * </p>
+ *
+ * @since 1.0
+ *
+ */
+public final class AlphabetConverter {
+
+ /**
+ * Original string to be encoded.
+ */
+ private final Map<Integer, String> originalToEncoded;
+ /**
+ * Encoding alphabet.
+ */
+ private final Map<String, String> encodedToOriginal;
+ /**
+ * Length of the encoded letter.
+ */
+ private final int encodedLetterLength;
+ /**
+ * Arrow constant, used for converting the object into a string.
+ */
+ private static final String ARROW = " -> ";
+ /**
+ * Line separator, used for converting the object into a string.
+ */
+ private static final String LINE_SEPARATOR =
+ System.getProperty("line.separator");
+
+ /**
+ * Hidden constructor for alphabet converter. Used by static helper methods.
+ *
+ * @param originalToEncoded original string to be encoded
+ * @param encodedToOriginal encoding alphabet
+ * @param encodedLetterLength length of the encoded letter
+ */
+ private AlphabetConverter(final Map<Integer, String> originalToEncoded,
+ final Map<String, String> encodedToOriginal,
+ final int encodedLetterLength) {
+
+ this.originalToEncoded = originalToEncoded;
+ this.encodedToOriginal = encodedToOriginal;
+ this.encodedLetterLength = encodedLetterLength;
+ }
+
+ /**
+ * Encode a given string.
+ *
+ * @param original the string to be encoded
+ * @return the encoded string, {@code null} if the given string is null
+ * @throws UnsupportedEncodingException if chars that are not supported are
+ * encountered
+ */
+ public String encode(final String original)
+ throws UnsupportedEncodingException {
+ if (original == null) {
+ return null;
+ }
+
+ final StringBuilder sb = new StringBuilder();
+
+ for (int i = 0; i < original.length();) {
+ final int codepoint = original.codePointAt(i);
+
+ final String nextLetter = originalToEncoded.get(codepoint);
+
+ if (nextLetter == null) {
+ throw new UnsupportedEncodingException(
+ "Couldn't find encoding for '"
+ + codePointToString(codepoint)
+ + "' in "
+ + original
+ );
+ }
+
+ sb.append(nextLetter);
+
+ i += Character.charCount(codepoint);
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * Decode a given string.
+ *
+ * @param encoded a string that has been encoded using this
+ * AlphabetConverter
+ * @return the decoded string, {@code null} if the given string is null
+ * @throws UnsupportedEncodingException if unexpected characters that
+ * cannot be handled are encountered
+ */
+ public String decode(final String encoded)
+ throws UnsupportedEncodingException {
+ if (encoded == null) {
+ return null;
+ }
+
+ final StringBuilder result = new StringBuilder();
+
+ for (int j = 0; j < encoded.length();) {
+ final Integer i = encoded.codePointAt(j);
+ final String s = codePointToString(i);
+
+ if (s.equals(originalToEncoded.get(i))) {
+ result.append(s);
+ j++; // because we do not encode in Unicode extended the
+ // length of each encoded char is 1
+ } else {
+ if (j + encodedLetterLength > encoded.length()) {
+ throw new UnsupportedEncodingException("Unexpected end "
+ + "of string while decoding " + encoded);
+ }
+ final String nextGroup = encoded.substring(j,
+ j + encodedLetterLength);
+ final String next = encodedToOriginal.get(nextGroup);
+ if (next == null) {
+ throw new UnsupportedEncodingException(
+ "Unexpected string without decoding ("
+ + nextGroup + ") in " + encoded);
+ }
+ result.append(next);
+ j += encodedLetterLength;
+ }
+ }
+
+ return result.toString();
+ }
+
+ /**
+ * Get the length of characters in the encoded alphabet that are necessary
+ * for each character in the original
+ * alphabet.
+ *
+ * @return the length of the encoded char
+ */
+ public int getEncodedCharLength() {
+ return encodedLetterLength;
+ }
+
+ /**
+ * Get the mapping from integer code point of source language to encoded
+ * string. Use to reconstruct converter from
+ * serialized map.
+ *
+ * @return the original map
+ */
+ public Map<Integer, String> getOriginalToEncoded() {
+ return Collections.unmodifiableMap(originalToEncoded);
+ }
+
+ /**
+ * Recursive method used when creating encoder/decoder.
+ *
+ * @param level at which point it should add a single encoding
+ * @param currentEncoding current encoding
+ * @param encoding letters encoding
+ * @param originals original values
+ * @param doNotEncodeMap map of values that should not be encoded
+ */
+ @SuppressWarnings("PMD")
+ private void addSingleEncoding(final int level,
+ final String currentEncoding,
+ final Collection<Integer> encoding,
+ final Iterator<Integer> originals,
+ final Map<Integer, String> doNotEncodeMap) {
+
+ if (level > 0) {
+ for (final int encodingLetter : encoding) {
+ if (originals.hasNext()) {
+
+ // this skips the doNotEncode chars if they are in the
+ // leftmost place
+ if (level != encodedLetterLength
+ || !doNotEncodeMap.containsKey(encodingLetter)) {
+ addSingleEncoding(level - 1,
+ currentEncoding
+ + codePointToString(encodingLetter),
+ encoding,
+ originals,
+ doNotEncodeMap
+ );
+ }
+ } else {
+ return; // done encoding all the original alphabet
+ }
+ }
+ } else {
+ Integer next = originals.next();
+
+ while (doNotEncodeMap.containsKey(next)) {
+ final String originalLetterAsString = codePointToString(next);
+
+ originalToEncoded.put(next, originalLetterAsString);
+ encodedToOriginal.put(originalLetterAsString,
+ originalLetterAsString);
+
+ if (!originals.hasNext()) {
+ return;
+ }
+
+ next = originals.next();
+ }
+
+ final String originalLetterAsString = codePointToString(next);
+
+ originalToEncoded.put(next, currentEncoding);
+ encodedToOriginal.put(currentEncoding, originalLetterAsString);
+ }
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+
+ for (final Entry<Integer, String> entry
+ : originalToEncoded.entrySet()) {
+ sb.append(codePointToString(entry.getKey()))
+ .append(ARROW)
+ .append(entry.getValue()).append(LINE_SEPARATOR);
+ }
+
+ return sb.toString();
+ }
+
+ @Override
+ public boolean equals(final Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (obj == this) {
+ return true;
+ }
+ if (!(obj instanceof AlphabetConverter)) {
+ return false;
+ }
+ final AlphabetConverter other = (AlphabetConverter) obj;
+ return originalToEncoded.equals(other.originalToEncoded)
+ && encodedToOriginal.equals(other.encodedToOriginal)
+ && encodedLetterLength == other.encodedLetterLength;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(originalToEncoded,
+ encodedToOriginal,
+ encodedLetterLength);
+ }
+
+ // -- static methods
+
+ /**
+ * Create a new converter from a map.
+ *
+ * @param originalToEncoded a map returned from getOriginalToEncoded()
+ * @return the reconstructed AlphabetConverter
+ * @see AlphabetConverter#getOriginalToEncoded()
+ */
+ public static AlphabetConverter createConverterFromMap(
+ final Map<Integer, String> originalToEncoded) {
+ final Map<Integer, String> unmodifiableOriginalToEncoded =
+ Collections.unmodifiableMap(originalToEncoded);
+ final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
+ final Map<Integer, String> doNotEncodeMap = new HashMap<>();
+
+ int encodedLetterLength = 1;
+
+ for (final Entry<Integer, String> e
+ : unmodifiableOriginalToEncoded.entrySet()) {
+ final String originalAsString = codePointToString(e.getKey());
+ encodedToOriginal.put(e.getValue(), originalAsString);
+
+ if (e.getValue().equals(originalAsString)) {
+ doNotEncodeMap.put(e.getKey(), e.getValue());
+ }
+
+ if (e.getValue().length() > encodedLetterLength) {
+ encodedLetterLength = e.getValue().length();
+ }
+ }
+
+ return new AlphabetConverter(unmodifiableOriginalToEncoded,
+ encodedToOriginal,
+ encodedLetterLength);
+ }
+
+ /**
+ * Create an alphabet converter, for converting from the original alphabet,
+ * to the encoded alphabet, while leaving the characters in
+ * <em>doNotEncode</em> as they are (if possible).
+ *
+ * <p>Duplicate letters in either original or encoding will be ignored.</p>
+ *
+ * @param original an array of chars representing the original alphabet
+ * @param encoding an array of chars representing the alphabet to be used
+ * for encoding
+ * @param doNotEncode an array of chars to be encoded using the original
+ * alphabet - every char here must appear in
+ * both the previous params
+ * @return the AlphabetConverter
+ * @throws IllegalArgumentException if an AlphabetConverter cannot be
+ * constructed
+ */
+ public static AlphabetConverter createConverterFromChars(
+ final Character[] original,
+ final Character[] encoding,
+ final Character[] doNotEncode) {
+ return AlphabetConverter.createConverter(
+ convertCharsToIntegers(original),
+ convertCharsToIntegers(encoding),
+ convertCharsToIntegers(doNotEncode));
+ }
+
+ /**
+ * Convert characters to integers.
+ *
+ * @param chars array of characters
+ * @return an equivalent array of integers
+ */
+ private static Integer[] convertCharsToIntegers(final Character[] chars) {
+ if (chars == null || chars.length == 0) {
+ return new Integer[0];
+ }
+ final Integer[] integers = new Integer[chars.length];
+ for (int i = 0; i < chars.length; i++) {
+ integers[i] = (int) chars[i];
+ }
+ return integers;
+ }
+
+ /**
+ * Create an alphabet converter, for converting from the original alphabet,
+ * to the encoded alphabet, while leaving
+ * the characters in <em>doNotEncode</em> as they are (if possible).
+ *
+ * <p>Duplicate letters in either original or encoding will be ignored.</p>
+ *
+ * @param original an array of ints representing the original alphabet in
+ * codepoints
+ * @param encoding an array of ints representing the alphabet to be used for
+ * encoding, in codepoints
+ * @param doNotEncode an array of ints representing the chars to be encoded
+ * using the original alphabet - every char
+ * here must appear in both the previous params
+ * @return the AlphabetConverter
+ * @throws IllegalArgumentException if an AlphabetConverter cannot be
+ * constructed
+ */
+ public static AlphabetConverter createConverter(
+ final Integer[] original,
+ final Integer[] encoding,
+ final Integer[] doNotEncode) {
+ final Set<Integer> originalCopy = new LinkedHashSet<>(Arrays.<Integer> asList(original));
+ final Set<Integer> encodingCopy = new LinkedHashSet<>(Arrays.<Integer> asList(encoding));
+ final Set<Integer> doNotEncodeCopy = new LinkedHashSet<>(Arrays.<Integer> asList(doNotEncode));
+
+ final Map<Integer, String> originalToEncoded = new LinkedHashMap<>();
+ final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
+ final Map<Integer, String> doNotEncodeMap = new HashMap<>();
+
+ int encodedLetterLength;
+
+ for (final int i : doNotEncodeCopy) {
+ if (!originalCopy.contains(i)) {
+ throw new IllegalArgumentException(
+ "Can not use 'do not encode' list because original "
+ + "alphabet does not contain '"
+ + codePointToString(i) + "'");
+ }
+
+ if (!encodingCopy.contains(i)) {
+ throw new IllegalArgumentException(
+ "Can not use 'do not encode' list because encoding alphabet does not contain '"
+ + codePointToString(i) + "'");
+ }
+
+ doNotEncodeMap.put(i, codePointToString(i));
+ }
+
+ if (encodingCopy.size() >= originalCopy.size()) {
+ encodedLetterLength = 1;
+
+ final Iterator<Integer> it = encodingCopy.iterator();
+
+ for (final int originalLetter : originalCopy) {
+ final String originalLetterAsString =
+ codePointToString(originalLetter);
+
+ if (doNotEncodeMap.containsKey(originalLetter)) {
+ originalToEncoded.put(originalLetter,
+ originalLetterAsString);
+ encodedToOriginal.put(originalLetterAsString,
+ originalLetterAsString);
+ } else {
+ Integer next = it.next();
+
+ while (doNotEncodeCopy.contains(next)) {
+ next = it.next();
+ }
+
+ final String encodedLetter = codePointToString(next);
+
+ originalToEncoded.put(originalLetter, encodedLetter);
+ encodedToOriginal.put(encodedLetter,
+ originalLetterAsString);
+ }
+ }
+
+ return new AlphabetConverter(originalToEncoded,
+ encodedToOriginal,
+ encodedLetterLength);
+
+ } else if (encodingCopy.size() - doNotEncodeCopy.size() < 2) {
+ throw new IllegalArgumentException(
+ "Must have at least two encoding characters (excluding "
+ + "those in the 'do not encode' list), but has "
+ + (encodingCopy.size() - doNotEncodeCopy.size()));
+ } else {
+ // we start with one which is our minimum, and because we do the
+ // first division outside the loop
+ int lettersSoFar = 1;
+
+ // the first division takes into account that the doNotEncode
+ // letters can't be in the leftmost place
+ int lettersLeft = (originalCopy.size() - doNotEncodeCopy.size())
+ / (encodingCopy.size() - doNotEncodeCopy.size());
+
+ while (lettersLeft / encodingCopy.size() >= 1) {
+ lettersLeft = lettersLeft / encodingCopy.size();
+ lettersSoFar++;
+ }
+
+ encodedLetterLength = lettersSoFar + 1;
+
+ final AlphabetConverter ac =
+ new AlphabetConverter(originalToEncoded,
+ encodedToOriginal,
+ encodedLetterLength);
+
+ ac.addSingleEncoding(encodedLetterLength,
+ "",
+ encodingCopy,
+ originalCopy.iterator(),
+ doNotEncodeMap);
+
+ return ac;
+ }
+ }
+
+ /**
+ * Create new String that contains just the given code point.
+ *
+ * @param i code point
+ * @return a new string with the new code point
+ * @see "http://www.oracle.com/us/technologies/java/supplementary-142654.html"
+ */
+ private static String codePointToString(final int i) {
+ if (Character.charCount(i) == 1) {
+ return String.valueOf((char) i);
+ }
+ return new String(Character.toChars(i));
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/Builder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/Builder.java b/src/main/java/org/apache/commons/text/Builder.java
new file mode 100644
index 0000000..c2c435c
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/Builder.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+/**
+ * <p>
+ * The Builder interface is designed to designate a class as a <em>builder</em>
+ * object in the Builder design pattern. Builders are capable of creating and
+ * configuring objects or results that normally take multiple steps to construct
+ * or are very complex to derive.
+ * </p>
+ *
+ * <p>
+ * The builder interface defines a single method, {@link #build()}, that
+ * classes must implement. The result of this method should be the final
+ * configured object or result after all building operations are performed.
+ * </p>
+ *
+ * <p>
+ * It is a recommended practice that the methods supplied to configure the
+ * object or result being built return a reference to {@code this} so that
+ * method calls can be chained together.
+ * </p>
+ *
+ * <p>
+ * Example Builder:
+ * <pre><code>
+ * class FontBuilder implements Builder<Font> {
+ * private Font font;
+ *
+ * public FontBuilder(String fontName) {
+ * this.font = new Font(fontName, Font.PLAIN, 12);
+ * }
+ *
+ * public FontBuilder bold() {
+ * this.font = this.font.deriveFont(Font.BOLD);
+ * return this; // Reference returned so calls can be chained
+ * }
+ *
+ * public FontBuilder size(float pointSize) {
+ * this.font = this.font.deriveFont(pointSize);
+ * return this; // Reference returned so calls can be chained
+ * }
+ *
+ * // Other Font construction methods
+ *
+ * public Font build() {
+ * return this.font;
+ * }
+ * }
+ * </code></pre>
+ *
+ * Example Builder Usage:
+ * <pre><code>
+ * Font bold14ptSansSerifFont = new FontBuilder(Font.SANS_SERIF).bold()
+ * .size(14.0f)
+ * .build();
+ * </code></pre>
+ *
+ *
+ * @param <T> the type of object that the builder will construct or compute.
+ * @since 1.0
+ *
+ */
+public interface Builder<T> {
+
+ /**
+ * Returns a reference to the object being constructed or result being
+ * calculated by the builder.
+ *
+ * @return the object constructed or result calculated by the builder.
+ */
+ T build();
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/CharacterPredicate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/CharacterPredicate.java b/src/main/java/org/apache/commons/text/CharacterPredicate.java
new file mode 100644
index 0000000..164432b
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/CharacterPredicate.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+/**
+ * A predicate for selecting code points. Implementations of this interface must
+ * be thread safe.
+ *
+ * @since 1.0
+ */
+public interface CharacterPredicate {
+
+ /**
+ * Tests the code point with this predicate.
+ *
+ * @param codePoint
+ * the code point to test
+ * @return {@code true} if the code point matches the predicate,
+ * {@code false} otherwise
+ * @since 1.0
+ */
+ boolean test(int codePoint);
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/CharacterPredicates.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/CharacterPredicates.java b/src/main/java/org/apache/commons/text/CharacterPredicates.java
new file mode 100644
index 0000000..a4544de
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/CharacterPredicates.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+/**
+ * <p>
+ * Commonly used implementations of {@link CharacterPredicate}. Per the interface
+ * requirements, all implementations are thread safe.
+ * </p>
+ *
+ * @since 1.0
+ */
+public enum CharacterPredicates implements CharacterPredicate {
+
+ /**
+ * Tests code points against {@link Character#isLetter(int)}.
+ *
+ * @since 1.0
+ */
+ LETTERS {
+ @Override
+ public boolean test(int codePoint) {
+ return Character.isLetter(codePoint);
+ }
+ },
+
+ /**
+ * Tests code points against {@link Character#isDigit(int)}.
+ *
+ * @since 1.0
+ */
+ DIGITS {
+ @Override
+ public boolean test(int codePoint) {
+ return Character.isDigit(codePoint);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/CompositeFormat.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/CompositeFormat.java b/src/main/java/org/apache/commons/text/CompositeFormat.java
new file mode 100644
index 0000000..bbe5754
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/CompositeFormat.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.text.FieldPosition;
+import java.text.Format;
+import java.text.ParseException;
+import java.text.ParsePosition;
+
+/**
+ * Formats using one formatter and parses using a different formatter. An
+ * example of use for this would be a webapp where data is taken in one way and
+ * stored in a database another way.
+ *
+ * @since 1.0
+ */
+public class CompositeFormat extends Format {
+
+ /**
+ * Required for serialization support.
+ *
+ * @see java.io.Serializable
+ */
+ private static final long serialVersionUID = -4329119827877627683L;
+
+ /** The parser to use. */
+ private final Format parser;
+ /** The formatter to use. */
+ private final Format formatter;
+
+ /**
+ * Create a format that points its parseObject method to one implementation
+ * and its format method to another.
+ *
+ * @param parser implementation
+ * @param formatter implementation
+ */
+ public CompositeFormat(final Format parser, final Format formatter) {
+ this.parser = parser;
+ this.formatter = formatter;
+ }
+
+ /**
+ * Uses the formatter Format instance.
+ *
+ * @param obj the object to format
+ * @param toAppendTo the {@link StringBuffer} to append to
+ * @param pos the FieldPosition to use (or ignore).
+ * @return <code>toAppendTo</code>
+ * @see Format#format(Object, StringBuffer, FieldPosition)
+ */
+ @Override // Therefore has to use StringBuffer
+ public StringBuffer format(final Object obj, final StringBuffer toAppendTo,
+ final FieldPosition pos) {
+ return formatter.format(obj, toAppendTo, pos);
+ }
+
+ /**
+ * Uses the parser Format instance.
+ *
+ * @param source the String source
+ * @param pos the ParsePosition containing the position to parse from, will
+ * be updated according to parsing success (index) or failure
+ * (error index)
+ * @return the parsed Object
+ * @see Format#parseObject(String, ParsePosition)
+ */
+ @Override
+ public Object parseObject(final String source, final ParsePosition pos) {
+ return parser.parseObject(source, pos);
+ }
+
+ /**
+ * Provides access to the parser Format implementation.
+ *
+ * @return parser Format implementation
+ */
+ public Format getParser() {
+ return this.parser;
+ }
+
+ /**
+ * Provides access to the parser Format implementation.
+ *
+ * @return formatter Format implementation
+ */
+ public Format getFormatter() {
+ return this.formatter;
+ }
+
+ /**
+ * Utility method to parse and then reformat a String.
+ *
+ * @param input String to reformat
+ * @return A reformatted String
+ * @throws ParseException thrown by parseObject(String) call
+ */
+ public String reformat(final String input) throws ParseException {
+ return format(parseObject(input));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/ExtendedMessageFormat.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/ExtendedMessageFormat.java b/src/main/java/org/apache/commons/text/ExtendedMessageFormat.java
new file mode 100644
index 0000000..2a08bbf
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/ExtendedMessageFormat.java
@@ -0,0 +1,571 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.text.Format;
+import java.text.MessageFormat;
+import java.text.ParsePosition;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Extends <code>java.text.MessageFormat</code> to allow pluggable/additional formatting
+ * options for embedded format elements. Client code should specify a registry
+ * of <code>FormatFactory</code> instances associated with <code>String</code>
+ * format names. This registry will be consulted when the format elements are
+ * parsed from the message pattern. In this way custom patterns can be specified,
+ * and the formats supported by <code>java.text.MessageFormat</code> can be overridden
+ * at the format and/or format style level (see MessageFormat). A "format element"
+ * embedded in the message pattern is specified (<b>()?</b> signifies optionality):<br>
+ * <code>{</code><i>argument-number</i><b>(</b><code>,</code><i>format-name</i><b>
+ * (</b><code>,</code><i>format-style</i><b>)?)?</b><code>}</code>
+ *
+ * <p>
+ * <i>format-name</i> and <i>format-style</i> values are trimmed of surrounding whitespace
+ * in the manner of <code>java.text.MessageFormat</code>. If <i>format-name</i> denotes
+ * <code>FormatFactory formatFactoryInstance</code> in <code>registry</code>, a <code>Format</code>
+ * matching <i>format-name</i> and <i>format-style</i> is requested from
+ * <code>formatFactoryInstance</code>. If this is successful, the <code>Format</code>
+ * found is used for this format element.
+ * </p>
+ *
+ * <p><b>NOTICE:</b> The various subformat mutator methods are considered unnecessary; they exist on the parent
+ * class to allow the type of customization which it is the job of this class to provide in
+ * a configurable fashion. These methods have thus been disabled and will throw
+ * <code>UnsupportedOperationException</code> if called.
+ * </p>
+ *
+ * <p>Limitations inherited from <code>java.text.MessageFormat</code>:</p>
+ * <ul>
+ * <li>When using "choice" subformats, support for nested formatting instructions is limited
+ * to that provided by the base class.</li>
+ * <li>Thread-safety of <code>Format</code>s, including <code>MessageFormat</code> and thus
+ * <code>ExtendedMessageFormat</code>, is not guaranteed.</li>
+ * </ul>
+ *
+ * @since 1.0
+ */
+public class ExtendedMessageFormat extends MessageFormat {
+
+ /**
+ * Serializable Object.
+ */
+ private static final long serialVersionUID = -2362048321261811743L;
+
+ /**
+ * Our initial seed value for calculating hashes.
+ */
+ private static final int HASH_SEED = 31;
+
+ /**
+ * The empty string.
+ */
+ private static final String DUMMY_PATTERN = "";
+
+ /**
+ * A comma.
+ */
+ private static final char START_FMT = ',';
+
+ /**
+ * A right side squigly brace.
+ */
+ private static final char END_FE = '}';
+
+ /**
+ * A left side squigly brace.
+ */
+ private static final char START_FE = '{';
+
+ /**
+ * A properly escaped character representing a single quote.
+ */
+ private static final char QUOTE = '\'';
+
+ /**
+ * To pattern string.
+ */
+ private String toPattern;
+
+ /**
+ * Our registry of FormatFactory's.
+ */
+ private final Map<String, ? extends FormatFactory> registry;
+
+ /**
+ * Create a new ExtendedMessageFormat for the default locale.
+ *
+ * @param pattern the pattern to use, not null
+ * @throws IllegalArgumentException in case of a bad pattern.
+ */
+ public ExtendedMessageFormat(final String pattern) {
+ this(pattern, Locale.getDefault());
+ }
+
+ /**
+ * Create a new ExtendedMessageFormat.
+ *
+ * @param pattern the pattern to use, not null
+ * @param locale the locale to use, not null
+ * @throws IllegalArgumentException in case of a bad pattern.
+ */
+ public ExtendedMessageFormat(final String pattern, final Locale locale) {
+ this(pattern, locale, null);
+ }
+
+ /**
+ * Create a new ExtendedMessageFormat for the default locale.
+ *
+ * @param pattern the pattern to use, not null
+ * @param registry the registry of format factories, may be null
+ * @throws IllegalArgumentException in case of a bad pattern.
+ */
+ public ExtendedMessageFormat(final String pattern,
+ final Map<String, ? extends FormatFactory> registry) {
+ this(pattern, Locale.getDefault(), registry);
+ }
+
+ /**
+ * Create a new ExtendedMessageFormat.
+ *
+ * @param pattern the pattern to use, not null
+ * @param locale the locale to use, not null
+ * @param registry the registry of format factories, may be null
+ * @throws IllegalArgumentException in case of a bad pattern.
+ */
+ public ExtendedMessageFormat(final String pattern,
+ final Locale locale,
+ final Map<String, ? extends FormatFactory> registry) {
+ super(DUMMY_PATTERN);
+ setLocale(locale);
+ this.registry = registry;
+ applyPattern(pattern);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String toPattern() {
+ return toPattern;
+ }
+
+ /**
+ * Apply the specified pattern.
+ *
+ * @param pattern String
+ */
+ @Override
+ public final void applyPattern(final String pattern) {
+ if (registry == null) {
+ super.applyPattern(pattern);
+ toPattern = super.toPattern();
+ return;
+ }
+ final ArrayList<Format> foundFormats = new ArrayList<>();
+ final ArrayList<String> foundDescriptions = new ArrayList<>();
+ final StringBuilder stripCustom = new StringBuilder(pattern.length());
+
+ final ParsePosition pos = new ParsePosition(0);
+ final char[] c = pattern.toCharArray();
+ int fmtCount = 0;
+ while (pos.getIndex() < pattern.length()) {
+ switch (c[pos.getIndex()]) {
+ case QUOTE:
+ appendQuotedString(pattern, pos, stripCustom);
+ break;
+ case START_FE:
+ fmtCount++;
+ seekNonWs(pattern, pos);
+ final int start = pos.getIndex();
+ final int index = readArgumentIndex(pattern, next(pos));
+ stripCustom.append(START_FE).append(index);
+ seekNonWs(pattern, pos);
+ Format format = null;
+ String formatDescription = null;
+ if (c[pos.getIndex()] == START_FMT) {
+ formatDescription = parseFormatDescription(pattern,
+ next(pos));
+ format = getFormat(formatDescription);
+ if (format == null) {
+ stripCustom.append(START_FMT).append(formatDescription);
+ }
+ }
+ foundFormats.add(format);
+ foundDescriptions.add(format == null ? null : formatDescription);
+ if (foundFormats.size() != fmtCount) {
+ throw new IllegalArgumentException("The validated expression is false");
+ }
+ if (foundDescriptions.size() != fmtCount) {
+ throw new IllegalArgumentException("The validated expression is false");
+ }
+ if (c[pos.getIndex()] != END_FE) {
+ throw new IllegalArgumentException(
+ "Unreadable format element at position " + start);
+ }
+ //$FALL-THROUGH$
+ default:
+ stripCustom.append(c[pos.getIndex()]);
+ next(pos);
+ }
+ }
+ super.applyPattern(stripCustom.toString());
+ toPattern = insertFormats(super.toPattern(), foundDescriptions);
+ if (containsElements(foundFormats)) {
+ final Format[] origFormats = getFormats();
+ // only loop over what we know we have, as MessageFormat on Java 1.3
+ // seems to provide an extra format element:
+ int i = 0;
+ for (final Iterator<Format> it = foundFormats.iterator(); it.hasNext(); i++) {
+ final Format f = it.next();
+ if (f != null) {
+ origFormats[i] = f;
+ }
+ }
+ super.setFormats(origFormats);
+ }
+ }
+
+ /**
+ * Throws UnsupportedOperationException - see class Javadoc for details.
+ *
+ * @param formatElementIndex format element index
+ * @param newFormat the new format
+ * @throws UnsupportedOperationException always thrown since this isn't
+ * supported by ExtendMessageFormat
+ */
+ @Override
+ public void setFormat(final int formatElementIndex, final Format newFormat) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Throws UnsupportedOperationException - see class Javadoc for details.
+ *
+ * @param argumentIndex argument index
+ * @param newFormat the new format
+ * @throws UnsupportedOperationException always thrown since this isn't
+ * supported by ExtendMessageFormat
+ */
+ @Override
+ public void setFormatByArgumentIndex(final int argumentIndex,
+ final Format newFormat) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Throws UnsupportedOperationException - see class Javadoc for details.
+ *
+ * @param newFormats new formats
+ * @throws UnsupportedOperationException always thrown since this isn't
+ * supported by ExtendMessageFormat
+ */
+ @Override
+ public void setFormats(final Format[] newFormats) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Throws UnsupportedOperationException - see class Javadoc for details.
+ *
+ * @param newFormats new formats
+ * @throws UnsupportedOperationException always thrown since this isn't
+ * supported by ExtendMessageFormat
+ */
+ @Override
+ public void setFormatsByArgumentIndex(final Format[] newFormats) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Check if this extended message format is equal to another object.
+ *
+ * @param obj the object to compare to
+ * @return true if this object equals the other, otherwise false
+ */
+ @Override
+ public boolean equals(final Object obj) {
+ if (obj == this) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (!super.equals(obj)) {
+ return false;
+ }
+ if (!Objects.equals(getClass(), obj.getClass())) {
+ return false;
+ }
+ final ExtendedMessageFormat rhs = (ExtendedMessageFormat) obj;
+ if (!Objects.equals(toPattern, rhs.toPattern)) {
+ return false;
+ }
+ if (!Objects.equals(registry, rhs.registry)) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = HASH_SEED * result + Objects.hashCode(registry);
+ result = HASH_SEED * result + Objects.hashCode(toPattern);
+ return result;
+ }
+
+ /**
+ * Get a custom format from a format description.
+ *
+ * @param desc String
+ * @return Format
+ */
+ private Format getFormat(final String desc) {
+ if (registry != null) {
+ String name = desc;
+ String args = null;
+ final int i = desc.indexOf(START_FMT);
+ if (i > 0) {
+ name = desc.substring(0, i).trim();
+ args = desc.substring(i + 1).trim();
+ }
+ final FormatFactory factory = registry.get(name);
+ if (factory != null) {
+ return factory.getFormat(name, args, getLocale());
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Read the argument index from the current format element.
+ *
+ * @param pattern pattern to parse
+ * @param pos current parse position
+ * @return argument index
+ */
+ private int readArgumentIndex(final String pattern, final ParsePosition pos) {
+ final int start = pos.getIndex();
+ seekNonWs(pattern, pos);
+ final StringBuilder result = new StringBuilder();
+ boolean error = false;
+ for (; !error && pos.getIndex() < pattern.length(); next(pos)) {
+ char c = pattern.charAt(pos.getIndex());
+ if (Character.isWhitespace(c)) {
+ seekNonWs(pattern, pos);
+ c = pattern.charAt(pos.getIndex());
+ if (c != START_FMT && c != END_FE) {
+ error = true;
+ continue;
+ }
+ }
+ if ((c == START_FMT || c == END_FE) && result.length() > 0) {
+ try {
+ return Integer.parseInt(result.toString());
+ } catch (final NumberFormatException e) { // NOPMD
+ // we've already ensured only digits, so unless something
+ // outlandishly large was specified we should be okay.
+ }
+ }
+ error = !Character.isDigit(c);
+ result.append(c);
+ }
+ if (error) {
+ throw new IllegalArgumentException(
+ "Invalid format argument index at position " + start + ": "
+ + pattern.substring(start, pos.getIndex()));
+ }
+ throw new IllegalArgumentException(
+ "Unterminated format element at position " + start);
+ }
+
+ /**
+ * Parse the format component of a format element.
+ *
+ * @param pattern string to parse
+ * @param pos current parse position
+ * @return Format description String
+ */
+ private String parseFormatDescription(final String pattern, final ParsePosition pos) {
+ final int start = pos.getIndex();
+ seekNonWs(pattern, pos);
+ final int text = pos.getIndex();
+ int depth = 1;
+ for (; pos.getIndex() < pattern.length(); next(pos)) {
+ switch (pattern.charAt(pos.getIndex())) {
+ case START_FE:
+ depth++;
+ break;
+ case END_FE:
+ depth--;
+ if (depth == 0) {
+ return pattern.substring(text, pos.getIndex());
+ }
+ break;
+ case QUOTE:
+ getQuotedString(pattern, pos);
+ break;
+ default:
+ break;
+ }
+ }
+ throw new IllegalArgumentException(
+ "Unterminated format element at position " + start);
+ }
+
+ /**
+ * Insert formats back into the pattern for toPattern() support.
+ *
+ * @param pattern source
+ * @param customPatterns The custom patterns to re-insert, if any
+ * @return full pattern
+ */
+ private String insertFormats(final String pattern, final ArrayList<String> customPatterns) {
+ if (!containsElements(customPatterns)) {
+ return pattern;
+ }
+ final StringBuilder sb = new StringBuilder(pattern.length() * 2);
+ final ParsePosition pos = new ParsePosition(0);
+ int fe = -1;
+ int depth = 0;
+ while (pos.getIndex() < pattern.length()) {
+ final char c = pattern.charAt(pos.getIndex());
+ switch (c) {
+ case QUOTE:
+ appendQuotedString(pattern, pos, sb);
+ break;
+ case START_FE:
+ depth++;
+ sb.append(START_FE).append(readArgumentIndex(pattern, next(pos)));
+ // do not look for custom patterns when they are embedded, e.g. in a choice
+ if (depth == 1) {
+ fe++;
+ final String customPattern = customPatterns.get(fe);
+ if (customPattern != null) {
+ sb.append(START_FMT).append(customPattern);
+ }
+ }
+ break;
+ case END_FE:
+ depth--;
+ //$FALL-THROUGH$
+ default:
+ sb.append(c);
+ next(pos);
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Consume whitespace from the current parse position.
+ *
+ * @param pattern String to read
+ * @param pos current position
+ */
+ private void seekNonWs(final String pattern, final ParsePosition pos) {
+ int len = 0;
+ final char[] buffer = pattern.toCharArray();
+ do {
+ len = StrMatcher.splitMatcher().isMatch(buffer, pos.getIndex());
+ pos.setIndex(pos.getIndex() + len);
+ } while (len > 0 && pos.getIndex() < pattern.length());
+ }
+
+ /**
+ * Convenience method to advance parse position by 1.
+ *
+ * @param pos ParsePosition
+ * @return <code>pos</code>
+ */
+ private ParsePosition next(final ParsePosition pos) {
+ pos.setIndex(pos.getIndex() + 1);
+ return pos;
+ }
+
+ /**
+ * Consume a quoted string, adding it to <code>appendTo</code> if
+ * specified.
+ *
+ * @param pattern pattern to parse
+ * @param pos current parse position
+ * @param appendTo optional StringBuilder to append
+ * @return <code>appendTo</code>
+ */
+ private StringBuilder appendQuotedString(final String pattern, final ParsePosition pos,
+ final StringBuilder appendTo) {
+ assert pattern.toCharArray()[pos.getIndex()] == QUOTE
+ : "Quoted string must start with quote character";
+
+ // handle quote character at the beginning of the string
+ if (appendTo != null) {
+ appendTo.append(QUOTE);
+ }
+ next(pos);
+
+ final int start = pos.getIndex();
+ final char[] c = pattern.toCharArray();
+ final int lastHold = start;
+ for (int i = pos.getIndex(); i < pattern.length(); i++) {
+ switch (c[pos.getIndex()]) {
+ case QUOTE:
+ next(pos);
+ return appendTo == null ? null : appendTo.append(c, lastHold,
+ pos.getIndex() - lastHold);
+ default:
+ next(pos);
+ }
+ }
+ throw new IllegalArgumentException(
+ "Unterminated quoted string at position " + start);
+ }
+
+ /**
+ * Consume quoted string only.
+ *
+ * @param pattern pattern to parse
+ * @param pos current parse position
+ */
+ private void getQuotedString(final String pattern, final ParsePosition pos) {
+ appendQuotedString(pattern, pos, null);
+ }
+
+ /**
+ * Learn whether the specified Collection contains non-null elements.
+ * @param coll to check
+ * @return <code>true</code> if some Object was found, <code>false</code> otherwise.
+ */
+ private boolean containsElements(final Collection<?> coll) {
+ if (coll == null || coll.isEmpty()) {
+ return false;
+ }
+ for (final Object name : coll) {
+ if (name != null) {
+ return true;
+ }
+ }
+ return false;
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/FormatFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/FormatFactory.java b/src/main/java/org/apache/commons/text/FormatFactory.java
new file mode 100644
index 0000000..ceee01b
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/FormatFactory.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.text.Format;
+import java.util.Locale;
+
+/**
+ * Format factory.
+ *
+ * @since 1.0
+ */
+public interface FormatFactory {
+
+ /**
+ * Create or retrieve a format instance.
+ *
+ * @param name The format type name
+ * @param arguments Arguments used to create the format instance. This allows the
+ * <code>FormatFactory</code> to implement the "format style"
+ * concept from <code>java.text.MessageFormat</code>.
+ * @param locale The locale, may be null
+ * @return The format instance
+ */
+ Format getFormat(String name, String arguments, Locale locale);
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/FormattableUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/FormattableUtils.java b/src/main/java/org/apache/commons/text/FormattableUtils.java
new file mode 100644
index 0000000..043cbf1
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/FormattableUtils.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.util.Formattable;
+import java.util.Formatter;
+
+import static java.util.FormattableFlags.LEFT_JUSTIFY;
+
+/**
+ * <p>Provides utilities for working with the {@code Formattable} interface.</p>
+ *
+ * <p>The {@link Formattable} interface provides basic control over formatting
+ * when using a {@code Formatter}. It is primarily concerned with numeric precision
+ * and padding, and is not designed to allow generalised alternate formats.</p>
+ *
+ * @since 1.0
+ *
+ */
+public class FormattableUtils {
+
+ /**
+ * A format that simply outputs the value as a string.
+ */
+ private static final String SIMPLEST_FORMAT = "%s";
+
+ /**
+ * <p>{@code FormattableUtils} instances should NOT be constructed in
+ * standard programming. Instead, the methods of the class should be invoked
+ * statically.</p>
+ *
+ * <p>This constructor is public to permit tools that require a JavaBean
+ * instance to operate.</p>
+ */
+ public FormattableUtils() {
+ super();
+ }
+
+ //-----------------------------------------------------------------------
+ /**
+ * Get the default formatted representation of the specified
+ * {@code Formattable}.
+ *
+ * @param formattable the instance to convert to a string, not null
+ * @return the resulting string, not null
+ */
+ public static String toString(final Formattable formattable) {
+ return String.format(SIMPLEST_FORMAT, formattable);
+ }
+
+ /**
+ * Handles the common {@code Formattable} operations of truncate-pad-append,
+ * with no ellipsis on precision overflow, and padding width underflow with
+ * spaces.
+ *
+ * @param seq the string to handle, not null
+ * @param formatter the destination formatter, not null
+ * @param flags the flags for formatting, see {@code Formattable}
+ * @param width the width of the output, see {@code Formattable}
+ * @param precision the precision of the output, see {@code Formattable}
+ * @return the {@code formatter} instance, not null
+ */
+ public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width,
+ final int precision) {
+ return append(seq, formatter, flags, width, precision, ' ', null);
+ }
+
+ /**
+ * Handles the common {@link Formattable} operations of truncate-pad-append,
+ * with no ellipsis on precision overflow.
+ *
+ * @param seq the string to handle, not null
+ * @param formatter the destination formatter, not null
+ * @param flags the flags for formatting, see {@code Formattable}
+ * @param width the width of the output, see {@code Formattable}
+ * @param precision the precision of the output, see {@code Formattable}
+ * @param padChar the pad character to use
+ * @return the {@code formatter} instance, not null
+ */
+ public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width,
+ final int precision, final char padChar) {
+ return append(seq, formatter, flags, width, precision, padChar, null);
+ }
+
+ /**
+ * Handles the common {@link Formattable} operations of truncate-pad-append,
+ * padding width underflow with spaces.
+ *
+ * @param seq the string to handle, not null
+ * @param formatter the destination formatter, not null
+ * @param flags the flags for formatting, see {@code Formattable}
+ * @param width the width of the output, see {@code Formattable}
+ * @param precision the precision of the output, see {@code Formattable}
+ * @param ellipsis the ellipsis to use when precision dictates truncation, null or
+ * empty causes a hard truncation
+ * @return the {@code formatter} instance, not null
+ */
+ public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width,
+ final int precision, final CharSequence ellipsis) {
+ return append(seq, formatter, flags, width, precision, ' ', ellipsis);
+ }
+
+ /**
+ * Handles the common {@link Formattable} operations of truncate-pad-append.
+ *
+ * @param seq the string to handle, not null
+ * @param formatter the destination formatter, not null
+ * @param flags the flags for formatting, see {@code Formattable}
+ * @param width the width of the output, see {@code Formattable}
+ * @param precision the precision of the output, see {@code Formattable}
+ * @param padChar the pad character to use
+ * @param ellipsis the ellipsis to use when precision dictates truncation, null or
+ * empty causes a hard truncation
+ * @return the {@code formatter} instance, not null
+ */
+ public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width,
+ final int precision, final char padChar, final CharSequence ellipsis) {
+ if (!(ellipsis == null || precision < 0 || ellipsis.length() <= precision)) {
+ throw new IllegalArgumentException(
+ String.format("Specified ellipsis '%1$s' exceeds precision of %2$s",
+ ellipsis,
+ Integer.valueOf(precision)));
+ }
+ final StringBuilder buf = new StringBuilder(seq);
+ if (precision >= 0 && precision < seq.length()) {
+ final CharSequence _ellipsis;
+ if (ellipsis == null) {
+ _ellipsis = "";
+ } else {
+ _ellipsis = ellipsis;
+ }
+ buf.replace(precision - _ellipsis.length(), seq.length(), _ellipsis.toString());
+ }
+ final boolean leftJustify = (flags & LEFT_JUSTIFY) == LEFT_JUSTIFY;
+ for (int i = buf.length(); i < width; i++) {
+ buf.insert(leftJustify ? i : 0, padChar);
+ }
+ formatter.format(buf.toString());
+ return formatter;
+ }
+
+}