You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2018/02/12 18:47:07 UTC
[text] [TEXT-116] Add a StrTokenizer replacement based on the
StringMatcher interface: StringTokenizer.
Repository: commons-text
Updated Branches:
refs/heads/master 995c44b71 -> 6d8b511f2
[TEXT-116] Add a StrTokenizer replacement based on the StringMatcher
interface: StringTokenizer.
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/6d8b511f
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/6d8b511f
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/6d8b511f
Branch: refs/heads/master
Commit: 6d8b511f2081117a3c07a5e54392b1948df79248
Parents: 995c44b
Author: Gary Gregory <ga...@gmail.com>
Authored: Mon Feb 12 11:47:03 2018 -0700
Committer: Gary Gregory <ga...@gmail.com>
Committed: Mon Feb 12 11:47:03 2018 -0700
----------------------------------------------------------------------
src/changes/changes.xml | 1 +
.../org/apache/commons/text/StrTokenizer.java | 2 +
.../apache/commons/text/StringTokenizer.java | 1176 ++++++++++++++++++
.../apache/commons/text/TextStringBuilder.java | 14 +-
.../text/StrBuilderAppendInsertTest.java | 3 +
.../org/apache/commons/text/StrBuilderTest.java | 3 +
.../org/apache/commons/text/StrLookupTest.java | 3 +
.../org/apache/commons/text/StrMatcherTest.java | 3 +
.../apache/commons/text/StrSubstitutorTest.java | 3 +
.../apache/commons/text/StrTokenizerTest.java | 3 +
.../commons/text/StringTokenizerTest.java | 962 ++++++++++++++
.../commons/text/TextStringBuilderTest.java | 2 +-
12 files changed, 2167 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 59a17cd..584c343 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,7 @@ The <action> type attribute can be add,update,fix,remove.
<action issue="TEXT-113" type="add" dev="ggregory">Add an interpolator string lookup</action>
<action issue="TEXT-114" type="add" dev="ggregory">Add a StrSubstitutor replacement based on interfaces: StringSubstitutor</action>
<action issue="TEXT-115" type="add" dev="ggregory">Add a StrBuilder replacement based on the StringMatcher interface: TextStringBuilder</action>
+ <action issue="TEXT-116" type="add" dev="ggregory">Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer</action>
</release>
<release version="1.2" date="2017-12-12" description="Release 1.2">
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/StrTokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StrTokenizer.java b/src/main/java/org/apache/commons/text/StrTokenizer.java
index c07ce31..3ae662d 100644
--- a/src/main/java/org/apache/commons/text/StrTokenizer.java
+++ b/src/main/java/org/apache/commons/text/StrTokenizer.java
@@ -80,7 +80,9 @@ import java.util.NoSuchElementException;
* </table>
*
* @since 1.0
+ * @deprecated Use {@link StringTokenizer}. This class will be removed in 2.0.
*/
+@Deprecated
public class StrTokenizer implements ListIterator<String>, Cloneable {
/** Comma separated values tokenizer internal variable. */
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/StringTokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StringTokenizer.java b/src/main/java/org/apache/commons/text/StringTokenizer.java
new file mode 100644
index 0000000..aaea4fb
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/StringTokenizer.java
@@ -0,0 +1,1176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
+import org.apache.commons.text.matcher.StringMatcher;
+import org.apache.commons.text.matcher.StringMatcherFactory;
+
+/**
+ * Tokenizes a string based on delimiters (separators) and supporting quoting and ignored character concepts.
+ * <p>
+ * This class can split a String into many smaller strings. It aims to do a similar job to
+ * {@link java.util.StringTokenizer StringTokenizer}, however it offers much more control and flexibility including
+ * implementing the <code>ListIterator</code> interface. By default, it is set up like <code>StringTokenizer</code>.
+ * <p>
+ * The input String is split into a number of <i>tokens</i>. Each token is separated from the next String by a
+ * <i>delimiter</i>. One or more delimiter characters must be specified.
+ * <p>
+ * Each token may be surrounded by quotes. The <i>quote</i> matcher specifies the quote character(s). A quote may be
+ * escaped within a quoted section by duplicating itself.
+ * <p>
+ * Between each token and the delimiter are potentially characters that need trimming. The <i>trimmer</i> matcher
+ * specifies these characters. One usage might be to trim whitespace characters.
+ * <p>
+ * At any point outside the quotes there might potentially be invalid characters. The <i>ignored</i> matcher specifies
+ * these characters to be removed. One usage might be to remove new line characters.
+ * <p>
+ * Empty tokens may be removed or returned as null.
+ *
+ * <pre>
+ * "a,b,c" - Three tokens "a","b","c" (comma delimiter)
+ * " a, b , c " - Three tokens "a","b","c" (default CSV processing trims whitespace)
+ * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)
+ * </pre>
+ * <p>
+ *
+ * This tokenizer has the following properties and options:
+ *
+ * <table summary="Tokenizer Properties">
+ * <tr>
+ * <th>Property</th>
+ * <th>Type</th>
+ * <th>Default</th>
+ * </tr>
+ * <tr>
+ * <td>delim</td>
+ * <td>CharSetMatcher</td>
+ * <td>{ \t\n\r\f}</td>
+ * </tr>
+ * <tr>
+ * <td>quote</td>
+ * <td>NoneMatcher</td>
+ * <td>{}</td>
+ * </tr>
+ * <tr>
+ * <td>ignore</td>
+ * <td>NoneMatcher</td>
+ * <td>{}</td>
+ * </tr>
+ * <tr>
+ * <td>emptyTokenAsNull</td>
+ * <td>boolean</td>
+ * <td>false</td>
+ * </tr>
+ * <tr>
+ * <td>ignoreEmptyTokens</td>
+ * <td>boolean</td>
+ * <td>true</td>
+ * </tr>
+ * </table>
+ *
+ * @since 1.3
+ */
+public class StringTokenizer implements ListIterator<String>, Cloneable {
+
+ /** Comma separated values tokenizer internal variable. */
+ private static final StringTokenizer CSV_TOKENIZER_PROTOTYPE;
+ /** Tab separated values tokenizer internal variable. */
+ private static final StringTokenizer TSV_TOKENIZER_PROTOTYPE;
+ static {
+ CSV_TOKENIZER_PROTOTYPE = new StringTokenizer();
+ CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.commaMatcher());
+ CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher());
+ CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+ CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
+ CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
+
+ TSV_TOKENIZER_PROTOTYPE = new StringTokenizer();
+ TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.tabMatcher());
+ TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher());
+ TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+ TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
+ TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
+ }
+
+ /** The text to work on. */
+ private char[] chars;
+ /** The parsed tokens. */
+ private String[] tokens;
+ /** The current iteration position. */
+ private int tokenPos;
+
+ /** The delimiter matcher. */
+ private StringMatcher delimMatcher = StringMatcherFactory.INSTANCE.splitMatcher();
+ /** The quote matcher. */
+ private StringMatcher quoteMatcher = StringMatcherFactory.INSTANCE.noneMatcher();
+ /** The ignored matcher. */
+ private StringMatcher ignoredMatcher = StringMatcherFactory.INSTANCE.noneMatcher();
+ /** The trimmer matcher. */
+ private StringMatcher trimmerMatcher = StringMatcherFactory.INSTANCE.noneMatcher();
+
+ /** Whether to return empty tokens as null. */
+ private boolean emptyAsNull = false;
+ /** Whether to ignore empty tokens. */
+ private boolean ignoreEmptyTokens = true;
+
+ // -----------------------------------------------------------------------
+
+ /**
+ * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
+ *
+ * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
+ */
+ private static StringTokenizer getCSVClone() {
+ return (StringTokenizer) CSV_TOKENIZER_PROTOTYPE.clone();
+ }
+
+ /**
+ * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input.
+ * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the
+ * setTrimmer method).
+ * <p>
+ * You must call a "reset" method to set the string which you want to parse.
+ *
+ * @return a new tokenizer instance which parses Comma Separated Value strings
+ */
+ public static StringTokenizer getCSVInstance() {
+ return getCSVClone();
+ }
+
+ /**
+ * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input.
+ * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the
+ * setTrimmer method).
+ *
+ * @param input
+ * the text to parse
+ * @return a new tokenizer instance which parses Comma Separated Value strings
+ */
+ public static StringTokenizer getCSVInstance(final String input) {
+ final StringTokenizer tok = getCSVClone();
+ tok.reset(input);
+ return tok;
+ }
+
+ /**
+ * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input.
+ * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the
+ * setTrimmer method).
+ *
+ * @param input
+ * the text to parse
+ * @return a new tokenizer instance which parses Comma Separated Value strings
+ */
+ public static StringTokenizer getCSVInstance(final char[] input) {
+ final StringTokenizer tok = getCSVClone();
+ tok.reset(input);
+ return tok;
+ }
+
+ /**
+ * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
+ *
+ * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
+ */
+ private static StringTokenizer getTSVClone() {
+ return (StringTokenizer) TSV_TOKENIZER_PROTOTYPE.clone();
+ }
+
+ /**
+ * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be
+ * trim whitespace from both ends (which can be overridden with the setTrimmer method).
+ * <p>
+ * You must call a "reset" method to set the string which you want to parse.
+ *
+ * @return a new tokenizer instance which parses Tab Separated Value strings.
+ */
+ public static StringTokenizer getTSVInstance() {
+ return getTSVClone();
+ }
+
+ /**
+ * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be
+ * trim whitespace from both ends (which can be overridden with the setTrimmer method).
+ *
+ * @param input
+ * the string to parse
+ * @return a new tokenizer instance which parses Tab Separated Value strings.
+ */
+ public static StringTokenizer getTSVInstance(final String input) {
+ final StringTokenizer tok = getTSVClone();
+ tok.reset(input);
+ return tok;
+ }
+
+ /**
+ * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be
+ * trim whitespace from both ends (which can be overridden with the setTrimmer method).
+ *
+ * @param input
+ * the string to parse
+ * @return a new tokenizer instance which parses Tab Separated Value strings.
+ */
+ public static StringTokenizer getTSVInstance(final char[] input) {
+ final StringTokenizer tok = getTSVClone();
+ tok.reset(input);
+ return tok;
+ }
+
+ // -----------------------------------------------------------------------
+ /**
+ * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer, but with no text to
+ * tokenize.
+ * <p>
+ * This constructor is normally used with {@link #reset(String)}.
+ */
+ public StringTokenizer() {
+ super();
+ this.chars = null;
+ }
+
+ /**
+ * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer.
+ *
+ * @param input
+ * the string which is to be parsed
+ */
+ public StringTokenizer(final String input) {
+ super();
+ if (input != null) {
+ chars = input.toCharArray();
+ } else {
+ chars = null;
+ }
+ }
+
+ /**
+ * Constructs a tokenizer splitting on the specified delimiter character.
+ *
+ * @param input
+ * the string which is to be parsed
+ * @param delim
+ * the field delimiter character
+ */
+ public StringTokenizer(final String input, final char delim) {
+ this(input);
+ setDelimiterChar(delim);
+ }
+
+ /**
+ * Constructs a tokenizer splitting on the specified delimiter string.
+ *
+ * @param input
+ * the string which is to be parsed
+ * @param delim
+ * the field delimiter string
+ */
+ public StringTokenizer(final String input, final String delim) {
+ this(input);
+ setDelimiterString(delim);
+ }
+
+ /**
+ * Constructs a tokenizer splitting using the specified delimiter matcher.
+ *
+ * @param input
+ * the string which is to be parsed
+ * @param delim
+ * the field delimiter matcher
+ */
+ public StringTokenizer(final String input, final StringMatcher delim) {
+ this(input);
+ setDelimiterMatcher(delim);
+ }
+
+ /**
+ * Constructs a tokenizer splitting on the specified delimiter character and handling quotes using the specified
+ * quote character.
+ *
+ * @param input
+ * the string which is to be parsed
+ * @param delim
+ * the field delimiter character
+ * @param quote
+ * the field quoted string character
+ */
+ public StringTokenizer(final String input, final char delim, final char quote) {
+ this(input, delim);
+ setQuoteChar(quote);
+ }
+
+ /**
+ * Constructs a tokenizer splitting using the specified delimiter matcher and handling quotes using the specified
+ * quote matcher.
+ *
+ * @param input
+ * the string which is to be parsed
+ * @param delim
+ * the field delimiter matcher
+ * @param quote
+ * the field quoted string matcher
+ */
+ public StringTokenizer(final String input, final StringMatcher delim, final StringMatcher quote) {
+ this(input, delim);
+ setQuoteMatcher(quote);
+ }
+
+ /**
+ * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer.
+ *
+ * @param input
+ * the string which is to be parsed, not cloned
+ */
+ public StringTokenizer(final char[] input) {
+ super();
+ if (input == null) {
+ this.chars = null;
+ } else {
+ this.chars = input.clone();
+ }
+ }
+
+ /**
+ * Constructs a tokenizer splitting on the specified character.
+ *
+ * @param input
+ * the string which is to be parsed, not cloned
+ * @param delim
+ * the field delimiter character
+ */
+ public StringTokenizer(final char[] input, final char delim) {
+ this(input);
+ setDelimiterChar(delim);
+ }
+
+ /**
+ * Constructs a tokenizer splitting on the specified string.
+ *
+ * @param input
+ * the string which is to be parsed, not cloned
+ * @param delim
+ * the field delimiter string
+ */
+ public StringTokenizer(final char[] input, final String delim) {
+ this(input);
+ setDelimiterString(delim);
+ }
+
+ /**
+ * Constructs a tokenizer splitting using the specified delimiter matcher.
+ *
+ * @param input
+ * the string which is to be parsed, not cloned
+ * @param delim
+ * the field delimiter matcher
+ */
+ public StringTokenizer(final char[] input, final StringMatcher delim) {
+ this(input);
+ setDelimiterMatcher(delim);
+ }
+
+ /**
+ * Constructs a tokenizer splitting on the specified delimiter character and handling quotes using the specified
+ * quote character.
+ *
+ * @param input
+ * the string which is to be parsed, not cloned
+ * @param delim
+ * the field delimiter character
+ * @param quote
+ * the field quoted string character
+ */
+ public StringTokenizer(final char[] input, final char delim, final char quote) {
+ this(input, delim);
+ setQuoteChar(quote);
+ }
+
+ /**
+ * Constructs a tokenizer splitting using the specified delimiter matcher and handling quotes using the specified
+ * quote matcher.
+ *
+ * @param input
+ * the string which is to be parsed, not cloned
+ * @param delim
+ * the field delimiter character
+ * @param quote
+ * the field quoted string character
+ */
+ public StringTokenizer(final char[] input, final StringMatcher delim, final StringMatcher quote) {
+ this(input, delim);
+ setQuoteMatcher(quote);
+ }
+
+ // API
+ // -----------------------------------------------------------------------
+ /**
+ * Gets the number of tokens found in the String.
+ *
+ * @return the number of matched tokens
+ */
+ public int size() {
+ checkTokenized();
+ return tokens.length;
+ }
+
+ /**
+ * Gets the next token from the String. Equivalent to {@link #next()} except it returns null rather than throwing
+ * {@link NoSuchElementException} when no tokens remain.
+ *
+ * @return the next sequential token, or null when no more tokens are found
+ */
+ public String nextToken() {
+ if (hasNext()) {
+ return tokens[tokenPos++];
+ }
+ return null;
+ }
+
+ /**
+ * Gets the previous token from the String.
+ *
+ * @return the previous sequential token, or null when no more tokens are found
+ */
+ public String previousToken() {
+ if (hasPrevious()) {
+ return tokens[--tokenPos];
+ }
+ return null;
+ }
+
+ /**
+ * Gets a copy of the full token list as an independent modifiable array.
+ *
+ * @return the tokens as a String array
+ */
+ public String[] getTokenArray() {
+ checkTokenized();
+ return tokens.clone();
+ }
+
+ /**
+ * Gets a copy of the full token list as an independent modifiable list.
+ *
+ * @return the tokens as a String array
+ */
+ public List<String> getTokenList() {
+ checkTokenized();
+ final List<String> list = new ArrayList<>(tokens.length);
+ Collections.addAll(list, tokens);
+
+ return list;
+ }
+
+ /**
+ * Resets this tokenizer, forgetting all parsing and iteration already completed.
+ * <p>
+ * This method allows the same tokenizer to be reused for the same String.
+ *
+ * @return this, to enable chaining
+ */
+ public StringTokenizer reset() {
+ tokenPos = 0;
+ tokens = null;
+ return this;
+ }
+
+ /**
+ * Reset this tokenizer, giving it a new input string to parse. In this manner you can re-use a tokenizer with the
+ * same settings on multiple input lines.
+ *
+ * @param input
+ * the new string to tokenize, null sets no text to parse
+ * @return this, to enable chaining
+ */
+ public StringTokenizer reset(final String input) {
+ reset();
+ if (input != null) {
+ this.chars = input.toCharArray();
+ } else {
+ this.chars = null;
+ }
+ return this;
+ }
+
+ /**
+ * Reset this tokenizer, giving it a new input string to parse. In this manner you can re-use a tokenizer with the
+ * same settings on multiple input lines.
+ *
+ * @param input
+ * the new character array to tokenize, not cloned, null sets no text to parse
+ * @return this, to enable chaining
+ */
+ public StringTokenizer reset(final char[] input) {
+ reset();
+ if (input != null) {
+ this.chars = input.clone();
+ } else {
+ this.chars = null;
+ }
+ return this;
+ }
+
+ // ListIterator
+ // -----------------------------------------------------------------------
+ /**
+ * Checks whether there are any more tokens.
+ *
+ * @return true if there are more tokens
+ */
+ @Override
+ public boolean hasNext() {
+ checkTokenized();
+ return tokenPos < tokens.length;
+ }
+
+ /**
+ * Gets the next token.
+ *
+ * @return the next String token
+ * @throws NoSuchElementException
+ * if there are no more elements
+ */
+ @Override
+ public String next() {
+ if (hasNext()) {
+ return tokens[tokenPos++];
+ }
+ throw new NoSuchElementException();
+ }
+
+ /**
+ * Gets the index of the next token to return.
+ *
+ * @return the next token index
+ */
+ @Override
+ public int nextIndex() {
+ return tokenPos;
+ }
+
+ /**
+ * Checks whether there are any previous tokens that can be iterated to.
+ *
+ * @return true if there are previous tokens
+ */
+ @Override
+ public boolean hasPrevious() {
+ checkTokenized();
+ return tokenPos > 0;
+ }
+
+ /**
+ * Gets the token previous to the last returned token.
+ *
+ * @return the previous token
+ */
+ @Override
+ public String previous() {
+ if (hasPrevious()) {
+ return tokens[--tokenPos];
+ }
+ throw new NoSuchElementException();
+ }
+
+ /**
+ * Gets the index of the previous token.
+ *
+ * @return the previous token index
+ */
+ @Override
+ public int previousIndex() {
+ return tokenPos - 1;
+ }
+
+ /**
+ * Unsupported ListIterator operation.
+ *
+ * @throws UnsupportedOperationException
+ * always
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("remove() is unsupported");
+ }
+
+ /**
+ * Unsupported ListIterator operation.
+ *
+ * @param obj
+ * this parameter ignored.
+ * @throws UnsupportedOperationException
+ * always
+ */
+ @Override
+ public void set(final String obj) {
+ throw new UnsupportedOperationException("set() is unsupported");
+ }
+
+ /**
+ * Unsupported ListIterator operation.
+ *
+ * @param obj
+ * this parameter ignored.
+ * @throws UnsupportedOperationException
+ * always
+ */
+ @Override
+ public void add(final String obj) {
+ throw new UnsupportedOperationException("add() is unsupported");
+ }
+
+ // Implementation
+ // -----------------------------------------------------------------------
+ /**
+ * Checks if tokenization has been done, and if not then do it.
+ */
+ private void checkTokenized() {
+ if (tokens == null) {
+ if (chars == null) {
+ // still call tokenize as subclass may do some work
+ final List<String> split = tokenize(null, 0, 0);
+ tokens = split.toArray(new String[split.size()]);
+ } else {
+ final List<String> split = tokenize(chars, 0, chars.length);
+ tokens = split.toArray(new String[split.size()]);
+ }
+ }
+ }
+
+ /**
+ * Internal method to performs the tokenization.
+ * <p>
+ * Most users of this class do not need to call this method. This method will be called automatically by other
+ * (public) methods when required.
+ * <p>
+ * This method exists to allow subclasses to add code before or after the tokenization. For example, a subclass
+ * could alter the character array, offset or count to be parsed, or call the tokenizer multiple times on multiple
+ * strings. It is also be possible to filter the results.
+ * <p>
+ * <code>StrTokenizer</code> will always pass a zero offset and a count equal to the length of the array to this
+ * method, however a subclass may pass other values, or even an entirely different array.
+ *
+ * @param srcChars
+ * the character array being tokenized, may be null
+ * @param offset
+ * the start position within the character array, must be valid
+ * @param count
+ * the number of characters to tokenize, must be valid
+ * @return the modifiable list of String tokens, unmodifiable if null array or zero count
+ */
+ protected List<String> tokenize(final char[] srcChars, final int offset, final int count) {
+ if (srcChars == null || count == 0) {
+ return Collections.emptyList();
+ }
+ final TextStringBuilder buf = new TextStringBuilder();
+ final List<String> tokenList = new ArrayList<>();
+ int pos = offset;
+
+ // loop around the entire buffer
+ while (pos >= 0 && pos < count) {
+ // find next token
+ pos = readNextToken(srcChars, pos, count, buf, tokenList);
+
+ // handle case where end of string is a delimiter
+ if (pos >= count) {
+ addToken(tokenList, "");
+ }
+ }
+ return tokenList;
+ }
+
+ /**
+ * Adds a token to a list, paying attention to the parameters we've set.
+ *
+ * @param list
+ * the list to add to
+ * @param tok
+ * the token to add
+ */
+ private void addToken(final List<String> list, String tok) {
+ if (tok == null || tok.length() == 0) {
+ if (isIgnoreEmptyTokens()) {
+ return;
+ }
+ if (isEmptyTokenAsNull()) {
+ tok = null;
+ }
+ }
+ list.add(tok);
+ }
+
+ /**
+ * Reads character by character through the String to get the next token.
+ *
+ * @param srcChars
+ * the character array being tokenized
+ * @param start
+ * the first character of field
+ * @param len
+ * the length of the character array being tokenized
+ * @param workArea
+ * a temporary work area
+ * @param tokenList
+ * the list of parsed tokens
+ * @return the starting position of the next field (the character immediately after the delimiter), or -1 if end of
+ * string found
+ */
+ private int readNextToken(final char[] srcChars, int start, final int len, final TextStringBuilder workArea,
+ final List<String> tokenList) {
+ // skip all leading whitespace, unless it is the
+ // field delimiter or the quote character
+ while (start < len) {
+ final int removeLen = Math.max(getIgnoredMatcher().isMatch(srcChars, start, start, len),
+ getTrimmerMatcher().isMatch(srcChars, start, start, len));
+ if (removeLen == 0 || getDelimiterMatcher().isMatch(srcChars, start, start, len) > 0
+ || getQuoteMatcher().isMatch(srcChars, start, start, len) > 0) {
+ break;
+ }
+ start += removeLen;
+ }
+
+ // handle reaching end
+ if (start >= len) {
+ addToken(tokenList, "");
+ return -1;
+ }
+
+ // handle empty token
+ final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len);
+ if (delimLen > 0) {
+ addToken(tokenList, "");
+ return start + delimLen;
+ }
+
+ // handle found token
+ final int quoteLen = getQuoteMatcher().isMatch(srcChars, start, start, len);
+ if (quoteLen > 0) {
+ return readWithQuotes(srcChars, start + quoteLen, len, workArea, tokenList, start, quoteLen);
+ }
+ return readWithQuotes(srcChars, start, len, workArea, tokenList, 0, 0);
+ }
+
+ /**
+ * Reads a possibly quoted string token.
+ *
+ * @param srcChars
+ * the character array being tokenized
+ * @param start
+ * the first character of field
+ * @param len
+ * the length of the character array being tokenized
+ * @param workArea
+ * a temporary work area
+ * @param tokenList
+ * the list of parsed tokens
+ * @param quoteStart
+ * the start position of the matched quote, 0 if no quoting
+ * @param quoteLen
+ * the length of the matched quote, 0 if no quoting
+ * @return the starting position of the next field (the character immediately after the delimiter, or if end of
+ * string found, then the length of string
+ */
+ private int readWithQuotes(final char[] srcChars, final int start, final int len, final TextStringBuilder workArea,
+ final List<String> tokenList, final int quoteStart, final int quoteLen) {
+ // Loop until we've found the end of the quoted
+ // string or the end of the input
+ workArea.clear();
+ int pos = start;
+ boolean quoting = quoteLen > 0;
+ int trimStart = 0;
+
+ while (pos < len) {
+ // quoting mode can occur several times throughout a string
+ // we must switch between quoting and non-quoting until we
+ // encounter a non-quoted delimiter, or end of string
+ if (quoting) {
+ // In quoting mode
+
+ // If we've found a quote character, see if it's
+ // followed by a second quote. If so, then we need
+ // to actually put the quote character into the token
+ // rather than end the token.
+ if (isQuote(srcChars, pos, len, quoteStart, quoteLen)) {
+ if (isQuote(srcChars, pos + quoteLen, len, quoteStart, quoteLen)) {
+ // matched pair of quotes, thus an escaped quote
+ workArea.append(srcChars, pos, quoteLen);
+ pos += quoteLen * 2;
+ trimStart = workArea.size();
+ continue;
+ }
+
+ // end of quoting
+ quoting = false;
+ pos += quoteLen;
+ continue;
+ }
+
+ // copy regular character from inside quotes
+ workArea.append(srcChars[pos++]);
+ trimStart = workArea.size();
+
+ } else {
+ // Not in quoting mode
+
+ // check for delimiter, and thus end of token
+ final int delimLen = getDelimiterMatcher().isMatch(srcChars, pos, start, len);
+ if (delimLen > 0) {
+ // return condition when end of token found
+ addToken(tokenList, workArea.substring(0, trimStart));
+ return pos + delimLen;
+ }
+
+ // check for quote, and thus back into quoting mode
+ if (quoteLen > 0 && isQuote(srcChars, pos, len, quoteStart, quoteLen)) {
+ quoting = true;
+ pos += quoteLen;
+ continue;
+ }
+
+ // check for ignored (outside quotes), and ignore
+ final int ignoredLen = getIgnoredMatcher().isMatch(srcChars, pos, start, len);
+ if (ignoredLen > 0) {
+ pos += ignoredLen;
+ continue;
+ }
+
+ // check for trimmed character
+ // don't yet know if its at the end, so copy to workArea
+ // use trimStart to keep track of trim at the end
+ final int trimmedLen = getTrimmerMatcher().isMatch(srcChars, pos, start, len);
+ if (trimmedLen > 0) {
+ workArea.append(srcChars, pos, trimmedLen);
+ pos += trimmedLen;
+ continue;
+ }
+
+ // copy regular character from outside quotes
+ workArea.append(srcChars[pos++]);
+ trimStart = workArea.size();
+ }
+ }
+
+ // return condition when end of string found
+ addToken(tokenList, workArea.substring(0, trimStart));
+ return -1;
+ }
+
+ /**
+ * Checks if the characters at the index specified match the quote already matched in readNextToken().
+ *
+ * @param srcChars
+ * the character array being tokenized
+ * @param pos
+ * the position to check for a quote
+ * @param len
+ * the length of the character array being tokenized
+ * @param quoteStart
+ * the start position of the matched quote, 0 if no quoting
+ * @param quoteLen
+ * the length of the matched quote, 0 if no quoting
+ * @return true if a quote is matched
+ */
+ private boolean isQuote(final char[] srcChars, final int pos, final int len, final int quoteStart,
+ final int quoteLen) {
+ for (int i = 0; i < quoteLen; i++) {
+ if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // Delimiter
+ // -----------------------------------------------------------------------
+ /**
+ * Gets the field delimiter matcher.
+ *
+ * @return the delimiter matcher in use
+ */
+ public StringMatcher getDelimiterMatcher() {
+ return this.delimMatcher;
+ }
+
+ /**
+ * Sets the field delimiter matcher.
+ * <p>
+ * The delimiter is used to separate one token from another.
+ *
+ * @param delim
+ * the delimiter matcher to use
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setDelimiterMatcher(final StringMatcher delim) {
+ if (delim == null) {
+ this.delimMatcher = StringMatcherFactory.INSTANCE.noneMatcher();
+ } else {
+ this.delimMatcher = delim;
+ }
+ return this;
+ }
+
+ /**
+ * Sets the field delimiter character.
+ *
+ * @param delim
+ * the delimiter character to use
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setDelimiterChar(final char delim) {
+ return setDelimiterMatcher(StringMatcherFactory.INSTANCE.charMatcher(delim));
+ }
+
+ /**
+ * Sets the field delimiter string.
+ *
+ * @param delim
+ * the delimiter string to use
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setDelimiterString(final String delim) {
+ return setDelimiterMatcher(StringMatcherFactory.INSTANCE.stringMatcher(delim));
+ }
+
+ // Quote
+ // -----------------------------------------------------------------------
+ /**
+ * Gets the quote matcher currently in use.
+ * <p>
+ * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. The
+ * default value is '"' (double quote).
+ *
+ * @return the quote matcher in use
+ */
+ public StringMatcher getQuoteMatcher() {
+ return quoteMatcher;
+ }
+
+ /**
+ * Set the quote matcher to use.
+ * <p>
+ * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data.
+ *
+ * @param quote
+ * the quote matcher to use, null ignored
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setQuoteMatcher(final StringMatcher quote) {
+ if (quote != null) {
+ this.quoteMatcher = quote;
+ }
+ return this;
+ }
+
+ /**
+ * Sets the quote character to use.
+ * <p>
+ * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data.
+ *
+ * @param quote
+ * the quote character to use
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setQuoteChar(final char quote) {
+ return setQuoteMatcher(StringMatcherFactory.INSTANCE.charMatcher(quote));
+ }
+
+ // Ignored
+ // -----------------------------------------------------------------------
+ /**
+ * Gets the ignored character matcher.
+ * <p>
+ * These characters are ignored when parsing the String, unless they are within a quoted region. The default value
+ * is not to ignore anything.
+ *
+ * @return the ignored matcher in use
+ */
+ public StringMatcher getIgnoredMatcher() {
+ return ignoredMatcher;
+ }
+
+ /**
+ * Set the matcher for characters to ignore.
+ * <p>
+ * These characters are ignored when parsing the String, unless they are within a quoted region.
+ *
+ * @param ignored
+ * the ignored matcher to use, null ignored
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setIgnoredMatcher(final StringMatcher ignored) {
+ if (ignored != null) {
+ this.ignoredMatcher = ignored;
+ }
+ return this;
+ }
+
+ /**
+ * Set the character to ignore.
+ * <p>
+ * This character is ignored when parsing the String, unless it is within a quoted region.
+ *
+ * @param ignored
+ * the ignored character to use
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setIgnoredChar(final char ignored) {
+ return setIgnoredMatcher(StringMatcherFactory.INSTANCE.charMatcher(ignored));
+ }
+
+ // Trimmer
+ // -----------------------------------------------------------------------
+ /**
+ * Gets the trimmer character matcher.
+ * <p>
+ * These characters are trimmed off on each side of the delimiter until the token or quote is found. The default
+ * value is not to trim anything.
+ *
+ * @return the trimmer matcher in use
+ */
+ public StringMatcher getTrimmerMatcher() {
+ return trimmerMatcher;
+ }
+
+ /**
+ * Sets the matcher for characters to trim.
+ * <p>
+ * These characters are trimmed off on each side of the delimiter until the token or quote is found.
+ *
+ * @param trimmer
+ * the trimmer matcher to use, null ignored
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setTrimmerMatcher(final StringMatcher trimmer) {
+ if (trimmer != null) {
+ this.trimmerMatcher = trimmer;
+ }
+ return this;
+ }
+
+ // -----------------------------------------------------------------------
+ /**
+ * Gets whether the tokenizer currently returns empty tokens as null. The default for this property is false.
+ *
+ * @return true if empty tokens are returned as null
+ */
+ public boolean isEmptyTokenAsNull() {
+ return this.emptyAsNull;
+ }
+
+ /**
+ * Sets whether the tokenizer should return empty tokens as null. The default for this property is false.
+ *
+ * @param emptyAsNull
+ * whether empty tokens are returned as null
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) {
+ this.emptyAsNull = emptyAsNull;
+ return this;
+ }
+
+ // -----------------------------------------------------------------------
+ /**
+ * Gets whether the tokenizer currently ignores empty tokens. The default for this property is true.
+ *
+ * @return true if empty tokens are not returned
+ */
+ public boolean isIgnoreEmptyTokens() {
+ return ignoreEmptyTokens;
+ }
+
+ /**
+ * Sets whether the tokenizer should ignore and not return empty tokens. The default for this property is true.
+ *
+ * @param ignoreEmptyTokens
+ * whether empty tokens are not returned
+ * @return this, to enable chaining
+ */
+ public StringTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) {
+ this.ignoreEmptyTokens = ignoreEmptyTokens;
+ return this;
+ }
+
+ // -----------------------------------------------------------------------
+ /**
+ * Gets the String content that the tokenizer is parsing.
+ *
+ * @return the string content being parsed
+ */
+ public String getContent() {
+ if (chars == null) {
+ return null;
+ }
+ return new String(chars);
+ }
+
+ // -----------------------------------------------------------------------
+ /**
+ * Creates a new instance of this Tokenizer. The new instance is reset so that it will be at the start of the token
+ * list. If a {@link CloneNotSupportedException} is caught, return <code>null</code>.
+ *
+ * @return a new instance of this Tokenizer which has been reset.
+ */
+ @Override
+ public Object clone() {
+ try {
+ return cloneReset();
+ } catch (final CloneNotSupportedException ex) {
+ return null;
+ }
+ }
+
+ /**
+ * Creates a new instance of this Tokenizer. The new instance is reset so that it will be at the start of the token
+ * list.
+ *
+ * @return a new instance of this Tokenizer which has been reset.
+ * @throws CloneNotSupportedException
+ * if there is a problem cloning
+ */
+ Object cloneReset() throws CloneNotSupportedException {
+ // this method exists to enable 100% test coverage
+ final StringTokenizer cloned = (StringTokenizer) super.clone();
+ if (cloned.chars != null) {
+ cloned.chars = cloned.chars.clone();
+ }
+ cloned.reset();
+ return cloned;
+ }
+
+ // -----------------------------------------------------------------------
+ /**
+ * Gets the String content that the tokenizer is parsing.
+ *
+ * @return the string content being parsed
+ */
+ @Override
+ public String toString() {
+ if (tokens == null) {
+ return "StringTokenizer[not tokenized yet]";
+ }
+ return "StringTokenizer" + getTokenList();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/TextStringBuilder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/TextStringBuilder.java b/src/main/java/org/apache/commons/text/TextStringBuilder.java
index 8ab9322..8943d03 100644
--- a/src/main/java/org/apache/commons/text/TextStringBuilder.java
+++ b/src/main/java/org/apache/commons/text/TextStringBuilder.java
@@ -2779,7 +2779,7 @@ public class TextStringBuilder implements CharSequence, Appendable, Serializable
* <p>
* The returned tokenizer is linked to this builder. You may intermix calls to the builder and tokenizer within
* certain limits, however there is no synchronization. Once the tokenizer has been used once, it must be
- * {@link StrTokenizer#reset() reset} to pickup the latest changes in the builder. For example:
+ * {@link StringTokenizer#reset() reset} to pickup the latest changes in the builder. For example:
*
* <pre>
* StrBuilder b = new StrBuilder();
@@ -2795,13 +2795,13 @@ public class TextStringBuilder implements CharSequence, Appendable, Serializable
* In addition to simply intermixing appends and tokenization, you can also call the set methods on the tokenizer to
* alter how it tokenizes. Just remember to call reset when you want to pickup builder changes.
* <p>
- * Calling {@link StrTokenizer#reset(String)} or {@link StrTokenizer#reset(char[])} with a non-null value will break
- * the link with the builder.
+ * Calling {@link StringTokenizer#reset(String)} or {@link StringTokenizer#reset(char[])} with a non-null value will
+ * break the link with the builder.
*
* @return a tokenizer that is linked to this builder
*/
- public StrTokenizer asTokenizer() {
- return new StrBuilderTokenizer();
+ public StringTokenizer asTokenizer() {
+ return new TextStringBuilderTokenizer();
}
// -----------------------------------------------------------------------
@@ -3038,12 +3038,12 @@ public class TextStringBuilder implements CharSequence, Appendable, Serializable
/**
* Inner class to allow StrBuilder to operate as a tokenizer.
*/
- class StrBuilderTokenizer extends StrTokenizer {
+ class TextStringBuilderTokenizer extends StringTokenizer {
/**
* Default constructor.
*/
- StrBuilderTokenizer() {
+ TextStringBuilderTokenizer() {
super();
}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java b/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java
index 3078808..db93410 100644
--- a/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java
+++ b/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java
@@ -30,7 +30,10 @@ import org.junit.Test;
/**
* Unit tests for {@link StrBuilder}.
+ *
+ * @deprecated This class will be removed in 2.0.
*/
+@Deprecated
public class StrBuilderAppendInsertTest {
/** The system line separator. */
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrBuilderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrBuilderTest.java b/src/test/java/org/apache/commons/text/StrBuilderTest.java
index 1cc5752..528588a 100644
--- a/src/test/java/org/apache/commons/text/StrBuilderTest.java
+++ b/src/test/java/org/apache/commons/text/StrBuilderTest.java
@@ -40,7 +40,10 @@ import org.junit.Test;
/**
* Unit tests for {@link StrBuilder}.
+ *
+ * @deprecated This class will be removed in 2.0.
*/
+@Deprecated
public class StrBuilderTest {
// -----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrLookupTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrLookupTest.java b/src/test/java/org/apache/commons/text/StrLookupTest.java
index fce36d2..62330e7 100644
--- a/src/test/java/org/apache/commons/text/StrLookupTest.java
+++ b/src/test/java/org/apache/commons/text/StrLookupTest.java
@@ -29,7 +29,10 @@ import org.junit.Test;
/**
* Test class for {@link StrLookup}.
+ *
+ * @deprecated This class will be removed in 2.0.
*/
+@Deprecated
public class StrLookupTest {
//-----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrMatcherTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrMatcherTest.java b/src/test/java/org/apache/commons/text/StrMatcherTest.java
index cac9670..22278f8 100644
--- a/src/test/java/org/apache/commons/text/StrMatcherTest.java
+++ b/src/test/java/org/apache/commons/text/StrMatcherTest.java
@@ -22,7 +22,10 @@ import org.junit.Test;
/**
* Unit tests for {@link StrMatcher}.
+ *
+ * @deprecated This class will be removed in 2.0.
*/
+@Deprecated
public class StrMatcherTest {
private static final char[] BUFFER1 = "0,1\t2 3\n\r\f\u0000'\"".toCharArray();
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrSubstitutorTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrSubstitutorTest.java b/src/test/java/org/apache/commons/text/StrSubstitutorTest.java
index cbd95c3..04f95bb 100644
--- a/src/test/java/org/apache/commons/text/StrSubstitutorTest.java
+++ b/src/test/java/org/apache/commons/text/StrSubstitutorTest.java
@@ -35,7 +35,10 @@ import org.junit.Test;
/**
* Test class for {@link StrSubstitutor}.
+ *
+ * @deprecated This class will be removed in 2.0.
*/
+@Deprecated
public class StrSubstitutorTest {
private Map<String, String> values;
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrTokenizerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrTokenizerTest.java b/src/test/java/org/apache/commons/text/StrTokenizerTest.java
index 63a6ec0..35b9cd8 100644
--- a/src/test/java/org/apache/commons/text/StrTokenizerTest.java
+++ b/src/test/java/org/apache/commons/text/StrTokenizerTest.java
@@ -32,7 +32,10 @@ import org.junit.Test;
/**
* Unit test for {@link StrTokenizer}.
+ *
+ * @deprecated This class will be removed in 2.0.
*/
+@Deprecated
public class StrTokenizerTest {
private static final String CSV_SIMPLE_FIXTURE = "A,b,c";
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StringTokenizerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StringTokenizerTest.java b/src/test/java/org/apache/commons/text/StringTokenizerTest.java
new file mode 100644
index 0000000..79a61db
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/StringTokenizerTest.java
@@ -0,0 +1,962 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.text;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import org.apache.commons.text.matcher.StringMatcher;
+import org.apache.commons.text.matcher.StringMatcherFactory;
+import org.junit.Test;
+
+/**
+ * Unit test for {@link StringTokenizer}.
+ */
+public class StringTokenizerTest {
+
+ private static final String CSV_SIMPLE_FIXTURE = "A,b,c";
+
+ private static final String TSV_SIMPLE_FIXTURE = "A\tb\tc";
+
+ private void checkClone(final StringTokenizer tokenizer) {
+ assertFalse(StringTokenizer.getCSVInstance() == tokenizer);
+ assertFalse(StringTokenizer.getTSVInstance() == tokenizer);
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void test1() {
+
+ final String input = "a;b;c;\"d;\"\"e\";f; ; ; ";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setDelimiterChar(';');
+ tok.setQuoteChar('"');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ final String[] tokens = tok.getTokenArray();
+
+ final String[] expected = { "a", "b", "c", "d;\"e", "f", "", "", "" };
+
+ assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'",
+ expected[i], tokens[i]);
+ }
+
+ }
+
+ @Test
+ public void test2() {
+
+ final String input = "a;b;c ;\"d;\"\"e\";f; ; ;";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setDelimiterChar(';');
+ tok.setQuoteChar('"');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ final String[] tokens = tok.getTokenArray();
+
+ final String[] expected = { "a", "b", "c ", "d;\"e", "f", " ", " ", "" };
+
+ assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'",
+ expected[i], tokens[i]);
+ }
+
+ }
+
+ @Test
+ public void test3() {
+
+ final String input = "a;b; c;\"d;\"\"e\";f; ; ;";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setDelimiterChar(';');
+ tok.setQuoteChar('"');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ final String[] tokens = tok.getTokenArray();
+
+ final String[] expected = { "a", "b", " c", "d;\"e", "f", " ", " ", "" };
+
+ assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'",
+ expected[i], tokens[i]);
+ }
+
+ }
+
+ @Test
+ public void test4() {
+
+ final String input = "a;b; c;\"d;\"\"e\";f; ; ;";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setDelimiterChar(';');
+ tok.setQuoteChar('"');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(true);
+ final String[] tokens = tok.getTokenArray();
+
+ final String[] expected = { "a", "b", "c", "d;\"e", "f" };
+
+ assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'",
+ expected[i], tokens[i]);
+ }
+
+ }
+
+ @Test
+ public void test5() {
+
+ final String input = "a;b; c;\"d;\"\"e\";f; ; ;";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setDelimiterChar(';');
+ tok.setQuoteChar('"');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ final String[] tokens = tok.getTokenArray();
+
+ final String[] expected = { "a", "b", "c", "d;\"e", "f", null, null, null };
+
+ assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'",
+ expected[i], tokens[i]);
+ }
+
+ }
+
+ @Test
+ public void test6() {
+
+ final String input = "a;b; c;\"d;\"\"e\";f; ; ;";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setDelimiterChar(';');
+ tok.setQuoteChar('"');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ // tok.setTreatingEmptyAsNull(true);
+ final String[] tokens = tok.getTokenArray();
+
+ final String[] expected = { "a", "b", " c", "d;\"e", "f", null, null, null };
+
+ int nextCount = 0;
+ while (tok.hasNext()) {
+ tok.next();
+ nextCount++;
+ }
+
+ int prevCount = 0;
+ while (tok.hasPrevious()) {
+ tok.previous();
+ prevCount++;
+ }
+
+ assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+
+ assertTrue("could not cycle through entire token list" + " using the 'hasNext' and 'next' methods",
+ nextCount == expected.length);
+
+ assertTrue("could not cycle through entire token list" + " using the 'hasPrevious' and 'previous' methods",
+ prevCount == expected.length);
+
+ }
+
+ @Test
+ public void test7() {
+
+ final String input = "a b c \"d e\" f ";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setDelimiterMatcher(StringMatcherFactory.INSTANCE.spaceMatcher());
+ tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher());
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ final String[] tokens = tok.getTokenArray();
+
+ final String[] expected = { "a", "", "", "b", "c", "d e", "f", "" };
+
+ assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'",
+ expected[i], tokens[i]);
+ }
+
+ }
+
+ @Test
+ public void test8() {
+
+ final String input = "a b c \"d e\" f ";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setDelimiterMatcher(StringMatcherFactory.INSTANCE.spaceMatcher());
+ tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher());
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+ tok.setIgnoreEmptyTokens(true);
+ final String[] tokens = tok.getTokenArray();
+
+ final String[] expected = { "a", "b", "c", "d e", "f" };
+
+ assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'",
+ expected[i], tokens[i]);
+ }
+
+ }
+
+ @Test
+ public void testBasic1() {
+ final String input = "a b c";
+ final StringTokenizer tok = new StringTokenizer(input);
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasic2() {
+ final String input = "a \nb\fc";
+ final StringTokenizer tok = new StringTokenizer(input);
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasic3() {
+ final String input = "a \nb\u0001\fc";
+ final StringTokenizer tok = new StringTokenizer(input);
+ assertEquals("a", tok.next());
+ assertEquals("b\u0001", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasic4() {
+ final String input = "a \"b\" c";
+ final StringTokenizer tok = new StringTokenizer(input);
+ assertEquals("a", tok.next());
+ assertEquals("\"b\"", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasic5() {
+ final String input = "a:b':c";
+ final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+ assertEquals("a", tok.next());
+ assertEquals("b'", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicDelim1() {
+ final String input = "a:b:c";
+ final StringTokenizer tok = new StringTokenizer(input, ':');
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicDelim2() {
+ final String input = "a:b:c";
+ final StringTokenizer tok = new StringTokenizer(input, ',');
+ assertEquals("a:b:c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testDelimString() {
+ final String input = "a##b##c";
+ final StringTokenizer tok = new StringTokenizer(input, "##");
+
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testDelimMatcher() {
+ final String input = "a/b\\c";
+ final StringMatcher delimMatcher = StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { '/', '\\' });
+
+ final StringTokenizer tok = new StringTokenizer(input, delimMatcher);
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testDelimMatcherQuoteMatcher() {
+ final String input = "`a`;`b`;`c`";
+ final StringMatcher delimMatcher = StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { ';' });
+ final StringMatcher quoteMatcher = StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { '`' });
+
+ final StringTokenizer tok = new StringTokenizer(input, delimMatcher, quoteMatcher);
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicEmpty1() {
+ final String input = "a b c";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setIgnoreEmptyTokens(false);
+ assertEquals("a", tok.next());
+ assertEquals("", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicEmpty2() {
+ final String input = "a b c";
+ final StringTokenizer tok = new StringTokenizer(input);
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertNull(tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicQuoted1() {
+ final String input = "a 'b' c";
+ final StringTokenizer tok = new StringTokenizer(input, ' ', '\'');
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicQuoted2() {
+ final String input = "a:'b':";
+ final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertNull(tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicQuoted3() {
+ final String input = "a:'b''c'";
+ final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("b'c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicQuoted4() {
+ final String input = "a: 'b' 'c' :d";
+ final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+ tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("b c", tok.next());
+ assertEquals("d", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicQuoted5() {
+ final String input = "a: 'b'x'c' :d";
+ final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+ tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("bxc", tok.next());
+ assertEquals("d", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicQuoted6() {
+ final String input = "a:'b'\"c':d";
+ final StringTokenizer tok = new StringTokenizer(input, ':');
+ tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.quoteMatcher());
+ assertEquals("a", tok.next());
+ assertEquals("b\"c:d", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicQuoted7() {
+ final String input = "a:\"There's a reason here\":b";
+ final StringTokenizer tok = new StringTokenizer(input, ':');
+ tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.quoteMatcher());
+ assertEquals("a", tok.next());
+ assertEquals("There's a reason here", tok.next());
+ assertEquals("b", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicQuotedTrimmed1() {
+ final String input = "a: 'b' :";
+ final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+ tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertNull(tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicTrimmed1() {
+ final String input = "a: b : ";
+ final StringTokenizer tok = new StringTokenizer(input, ':');
+ tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertNull(tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicTrimmed2() {
+ final String input = "a: b :";
+ final StringTokenizer tok = new StringTokenizer(input, ':');
+ tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.stringMatcher(" "));
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertNull(tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicIgnoreTrimmed1() {
+ final String input = "a: bIGNOREc : ";
+ final StringTokenizer tok = new StringTokenizer(input, ':');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE"));
+ tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("bc", tok.next());
+ assertNull(tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicIgnoreTrimmed2() {
+ final String input = "IGNOREaIGNORE: IGNORE bIGNOREc IGNORE : IGNORE ";
+ final StringTokenizer tok = new StringTokenizer(input, ':');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE"));
+ tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("bc", tok.next());
+ assertNull(tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicIgnoreTrimmed3() {
+ final String input = "IGNOREaIGNORE: IGNORE bIGNOREc IGNORE : IGNORE ";
+ final StringTokenizer tok = new StringTokenizer(input, ':');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE"));
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals(" bc ", tok.next());
+ assertEquals(" ", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ @Test
+ public void testBasicIgnoreTrimmed4() {
+ final String input = "IGNOREaIGNORE: IGNORE 'bIGNOREc'IGNORE'd' IGNORE : IGNORE ";
+ final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+ tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE"));
+ tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+ tok.setIgnoreEmptyTokens(false);
+ tok.setEmptyTokenAsNull(true);
+ assertEquals("a", tok.next());
+ assertEquals("bIGNOREcd", tok.next());
+ assertNull(tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testListArray() {
+ final String input = "a b c";
+ final StringTokenizer tok = new StringTokenizer(input);
+ final String[] array = tok.getTokenArray();
+ final List<?> list = tok.getTokenList();
+
+ assertEquals(Arrays.asList(array), list);
+ assertEquals(3, list.size());
+ }
+
+ // -----------------------------------------------------------------------
+ private void testCSV(final String data) {
+ this.testXSVAbc(StringTokenizer.getCSVInstance(data));
+ this.testXSVAbc(StringTokenizer.getCSVInstance(data.toCharArray()));
+ }
+
+ @Test
+ public void testCSVEmpty() {
+ this.testEmpty(StringTokenizer.getCSVInstance());
+ this.testEmpty(StringTokenizer.getCSVInstance(""));
+ }
+
+ @Test
+ public void testCSVSimple() {
+ this.testCSV(CSV_SIMPLE_FIXTURE);
+ }
+
+ @Test
+ public void testCSVSimpleNeedsTrim() {
+ this.testCSV(" " + CSV_SIMPLE_FIXTURE);
+ this.testCSV(" \n\t " + CSV_SIMPLE_FIXTURE);
+ this.testCSV(" \n " + CSV_SIMPLE_FIXTURE + "\n\n\r");
+ }
+
+ void testEmpty(final StringTokenizer tokenizer) {
+ this.checkClone(tokenizer);
+ assertFalse(tokenizer.hasNext());
+ assertFalse(tokenizer.hasPrevious());
+ assertNull(tokenizer.nextToken());
+ assertEquals(0, tokenizer.size());
+ try {
+ tokenizer.next();
+ fail();
+ } catch (final NoSuchElementException ex) {
+ }
+ }
+
+ @Test
+ public void testGetContent() {
+ final String input = "a b c \"d e\" f ";
+ StringTokenizer tok = new StringTokenizer(input);
+ assertEquals(input, tok.getContent());
+
+ tok = new StringTokenizer(input.toCharArray());
+ assertEquals(input, tok.getContent());
+
+ tok = new StringTokenizer();
+ assertNull(tok.getContent());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testChaining() {
+ final StringTokenizer tok = new StringTokenizer();
+ assertEquals(tok, tok.reset());
+ assertEquals(tok, tok.reset(""));
+ assertEquals(tok, tok.reset(new char[0]));
+ assertEquals(tok, tok.setDelimiterChar(' '));
+ assertEquals(tok, tok.setDelimiterString(" "));
+ assertEquals(tok, tok.setDelimiterMatcher(null));
+ assertEquals(tok, tok.setQuoteChar(' '));
+ assertEquals(tok, tok.setQuoteMatcher(null));
+ assertEquals(tok, tok.setIgnoredChar(' '));
+ assertEquals(tok, tok.setIgnoredMatcher(null));
+ assertEquals(tok, tok.setTrimmerMatcher(null));
+ assertEquals(tok, tok.setEmptyTokenAsNull(false));
+ assertEquals(tok, tok.setIgnoreEmptyTokens(false));
+ }
+
+ /**
+ * Tests that the {@link StringTokenizer#clone()} clone method catches {@link CloneNotSupportedException} and
+ * returns <code>null</code>.
+ */
+ @Test
+ public void testCloneNotSupportedException() {
+ final Object notCloned = new StringTokenizer() {
+
+ @Override
+ Object cloneReset() throws CloneNotSupportedException {
+ throw new CloneNotSupportedException("test");
+ }
+ }.clone();
+ assertNull(notCloned);
+ }
+
+ @Test
+ public void testCloneNull() {
+ final StringTokenizer tokenizer = new StringTokenizer((char[]) null);
+ // Start sanity check
+ assertNull(tokenizer.nextToken());
+ tokenizer.reset();
+ assertNull(tokenizer.nextToken());
+ // End sanity check
+ final StringTokenizer clonedTokenizer = (StringTokenizer) tokenizer.clone();
+ tokenizer.reset();
+ assertNull(tokenizer.nextToken());
+ assertNull(clonedTokenizer.nextToken());
+ }
+
+ @Test
+ public void testCloneReset() {
+ final char[] input = new char[] { 'a' };
+ final StringTokenizer tokenizer = new StringTokenizer(input);
+ // Start sanity check
+ assertEquals("a", tokenizer.nextToken());
+ tokenizer.reset(input);
+ assertEquals("a", tokenizer.nextToken());
+ // End sanity check
+ final StringTokenizer clonedTokenizer = (StringTokenizer) tokenizer.clone();
+ input[0] = 'b';
+ tokenizer.reset(input);
+ assertEquals("b", tokenizer.nextToken());
+ assertEquals("a", clonedTokenizer.nextToken());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testConstructor_String() {
+ StringTokenizer tok = new StringTokenizer("a b");
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer("");
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer((String) null);
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testConstructor_String_char() {
+ StringTokenizer tok = new StringTokenizer("a b", ' ');
+ assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1));
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer("", ' ');
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer((String) null, ' ');
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testConstructor_String_char_char() {
+ StringTokenizer tok = new StringTokenizer("a b", ' ', '"');
+ assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1));
+ assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 0, 1));
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer("", ' ', '"');
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer((String) null, ' ', '"');
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testConstructor_charArray() {
+ StringTokenizer tok = new StringTokenizer("a b".toCharArray());
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer(new char[0]);
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer((char[]) null);
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testConstructor_charArray_char() {
+ StringTokenizer tok = new StringTokenizer("a b".toCharArray(), ' ');
+ assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1));
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer(new char[0], ' ');
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer((char[]) null, ' ');
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testConstructor_charArray_char_char() {
+ StringTokenizer tok = new StringTokenizer("a b".toCharArray(), ' ', '"');
+ assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1));
+ assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 0, 1));
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer(new char[0], ' ', '"');
+ assertFalse(tok.hasNext());
+
+ tok = new StringTokenizer((char[]) null, ' ', '"');
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testReset() {
+ final StringTokenizer tok = new StringTokenizer("a b c");
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok.reset();
+ assertEquals("a", tok.next());
+ assertEquals("b", tok.next());
+ assertEquals("c", tok.next());
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testReset_String() {
+ final StringTokenizer tok = new StringTokenizer("x x x");
+ tok.reset("d e");
+ assertEquals("d", tok.next());
+ assertEquals("e", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok.reset((String) null);
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testReset_charArray() {
+ final StringTokenizer tok = new StringTokenizer("x x x");
+
+ final char[] array = new char[] { 'a', 'b', 'c' };
+ tok.reset(array);
+ assertEquals("abc", tok.next());
+ assertFalse(tok.hasNext());
+
+ tok.reset((char[]) null);
+ assertFalse(tok.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testTSV() {
+ this.testXSVAbc(StringTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE));
+ this.testXSVAbc(StringTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE.toCharArray()));
+ }
+
+ @Test
+ public void testTSVEmpty() {
+ this.testEmpty(StringTokenizer.getTSVInstance());
+ this.testEmpty(StringTokenizer.getTSVInstance(""));
+ }
+
+ void testXSVAbc(final StringTokenizer tokenizer) {
+ this.checkClone(tokenizer);
+ assertEquals(-1, tokenizer.previousIndex());
+ assertEquals(0, tokenizer.nextIndex());
+ assertNull(tokenizer.previousToken());
+ assertEquals("A", tokenizer.nextToken());
+ assertEquals(1, tokenizer.nextIndex());
+ assertEquals("b", tokenizer.nextToken());
+ assertEquals(2, tokenizer.nextIndex());
+ assertEquals("c", tokenizer.nextToken());
+ assertEquals(3, tokenizer.nextIndex());
+ assertNull(tokenizer.nextToken());
+ assertEquals(3, tokenizer.nextIndex());
+ assertEquals("c", tokenizer.previousToken());
+ assertEquals(2, tokenizer.nextIndex());
+ assertEquals("b", tokenizer.previousToken());
+ assertEquals(1, tokenizer.nextIndex());
+ assertEquals("A", tokenizer.previousToken());
+ assertEquals(0, tokenizer.nextIndex());
+ assertNull(tokenizer.previousToken());
+ assertEquals(0, tokenizer.nextIndex());
+ assertEquals(-1, tokenizer.previousIndex());
+ assertEquals(3, tokenizer.size());
+ }
+
+ @Test
+ public void testIteration() {
+ final StringTokenizer tkn = new StringTokenizer("a b c");
+ assertFalse(tkn.hasPrevious());
+ try {
+ tkn.previous();
+ fail();
+ } catch (final NoSuchElementException ex) {
+ }
+ assertTrue(tkn.hasNext());
+
+ assertEquals("a", tkn.next());
+ try {
+ tkn.remove();
+ fail();
+ } catch (final UnsupportedOperationException ex) {
+ }
+ try {
+ tkn.set("x");
+ fail();
+ } catch (final UnsupportedOperationException ex) {
+ }
+ try {
+ tkn.add("y");
+ fail();
+ } catch (final UnsupportedOperationException ex) {
+ }
+ assertTrue(tkn.hasPrevious());
+ assertTrue(tkn.hasNext());
+
+ assertEquals("b", tkn.next());
+ assertTrue(tkn.hasPrevious());
+ assertTrue(tkn.hasNext());
+
+ assertEquals("c", tkn.next());
+ assertTrue(tkn.hasPrevious());
+ assertFalse(tkn.hasNext());
+
+ try {
+ tkn.next();
+ fail();
+ } catch (final NoSuchElementException ex) {
+ }
+ assertTrue(tkn.hasPrevious());
+ assertFalse(tkn.hasNext());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testTokenizeSubclassInputChange() {
+ final StringTokenizer tkn = new StringTokenizer("a b c d e") {
+
+ @Override
+ protected List<String> tokenize(final char[] chars, final int offset, final int count) {
+ return super.tokenize("w x y z".toCharArray(), 2, 5);
+ }
+ };
+ assertEquals("x", tkn.next());
+ assertEquals("y", tkn.next());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testTokenizeSubclassOutputChange() {
+ final StringTokenizer tkn = new StringTokenizer("a b c") {
+
+ @Override
+ protected List<String> tokenize(final char[] chars, final int offset, final int count) {
+ final List<String> list = super.tokenize(chars, offset, count);
+ Collections.reverse(list);
+ return list;
+ }
+ };
+ assertEquals("c", tkn.next());
+ assertEquals("b", tkn.next());
+ assertEquals("a", tkn.next());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testToString() {
+ final StringTokenizer tkn = new StringTokenizer("a b c d e");
+ assertEquals("StringTokenizer[not tokenized yet]", tkn.toString());
+ tkn.next();
+ assertEquals("StringTokenizer[a, b, c, d, e]", tkn.toString());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testStringTokenizerStringMatcher() {
+ final char[] chars = { 'a', 'b', 'c', 'd' };
+ final StringTokenizer tokens = new StringTokenizer(chars, "bc");
+ assertEquals("a", tokens.next());
+ assertEquals("d", tokens.next());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testStringTokenizerStrMatcher() {
+ final char[] chars = { 'a', ',', 'c' };
+ final StringTokenizer tokens = new StringTokenizer(chars, StringMatcherFactory.INSTANCE.commaMatcher());
+ assertEquals("a", tokens.next());
+ assertEquals("c", tokens.next());
+ }
+
+ // -----------------------------------------------------------------------
+ @Test
+ public void testStringTokenizerQuoteMatcher() {
+ final char[] chars = { '\'', 'a', 'c', '\'', 'd' };
+ final StringTokenizer tokens = new StringTokenizer(chars, StringMatcherFactory.INSTANCE.commaMatcher(),
+ StringMatcherFactory.INSTANCE.quoteMatcher());
+ assertEquals("acd", tokens.next());
+ }
+
+ @Test
+ public void testPreviousTokenAndSetEmptyTokenAsNull() {
+ final StringTokenizer strTokenizer = StringTokenizer.getTSVInstance(" \t\n\r\f");
+ strTokenizer.setEmptyTokenAsNull(true);
+
+ assertNull(strTokenizer.previousToken());
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/TextStringBuilderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/TextStringBuilderTest.java b/src/test/java/org/apache/commons/text/TextStringBuilderTest.java
index 88d3a50..dec5d02 100644
--- a/src/test/java/org/apache/commons/text/TextStringBuilderTest.java
+++ b/src/test/java/org/apache/commons/text/TextStringBuilderTest.java
@@ -1691,7 +1691,7 @@ public class TextStringBuilderTest {
// from Javadoc
final TextStringBuilder b = new TextStringBuilder();
b.append("a b ");
- final StrTokenizer t = b.asTokenizer();
+ final StringTokenizer t = b.asTokenizer();
final String[] tokens1 = t.getTokenArray();
assertEquals(2, tokens1.length);