You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by sc...@apache.org on 2006/07/22 19:25:38 UTC
svn commit: r424608 - in /jakarta/commons/proper/lang/trunk/src:
java/org/apache/commons/lang/text/StrTokenizer.java
test/org/apache/commons/lang/text/StrTokenizerTest.java
Author: scolebourne
Date: Sat Jul 22 10:25:38 2006
New Revision: 424608
URL: http://svn.apache.org/viewvc?rev=424608&view=rev
Log:
Allow tokenizer state to be adjusted before and after tokenizing
Modified:
jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java
jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java
Modified: jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java?rev=424608&r1=424607&r2=424608&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java (original)
+++ jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java Sat Jul 22 10:25:38 2006
@@ -16,12 +16,11 @@
package org.apache.commons.lang.text;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;
-import org.apache.commons.lang.ArrayUtils;
-
/**
* Tokenizes a string based based on delimiters (separators)
* and supporting quoting and ignored character concepts.
@@ -107,10 +106,8 @@
TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
}
- /** The text to work on */
+ /** The text to work on. */
private char chars[];
- /** The input text, null if char[] input */
- private String text;
/** The parsed tokens */
private String tokens[];
/** The current iteration position */
@@ -241,8 +238,7 @@
*/
public StrTokenizer() {
super();
- this.text = "";
- this.chars = new char[0];
+ this.chars = null;
}
/**
@@ -253,7 +249,6 @@
*/
public StrTokenizer(String input) {
super();
- text = input;
if (input != null) {
chars = input.toCharArray();
} else {
@@ -331,7 +326,6 @@
*/
public StrTokenizer(char[] input) {
super();
- this.text = null;
this.chars = input;
}
@@ -417,7 +411,7 @@
* @return the number of matched tokens
*/
public int size() {
- tokenize();
+ checkTokenized();
return tokens.length;
}
@@ -451,7 +445,7 @@
* @return the tokens as a String array
*/
public String[] getTokenArray() {
- tokenize();
+ checkTokenized();
return (String[]) tokens.clone();
}
@@ -461,7 +455,7 @@
* @return the tokens as a String array
*/
public List getTokenList() {
- tokenize();
+ checkTokenized();
List list = new ArrayList(tokens.length);
for (int i = 0; i < tokens.length; i++) {
list.add(tokens[i]);
@@ -492,11 +486,10 @@
*/
public StrTokenizer reset(String input) {
reset();
- text = input;
if (input != null) {
- chars = input.toCharArray();
+ this.chars = input.toCharArray();
} else {
- chars = null;
+ this.chars = null;
}
return this;
}
@@ -514,8 +507,7 @@
*/
public StrTokenizer reset(char[] input) {
reset();
- text = null;
- chars = input;
+ this.chars = input;
return this;
}
@@ -527,7 +519,7 @@
* @return true if there are more tokens
*/
public boolean hasNext() {
- tokenize();
+ checkTokenized();
return tokenPos < tokens.length;
}
@@ -558,7 +550,7 @@
* @return true if there are previous tokens
*/
public boolean hasPrevious() {
- tokenize();
+ checkTokenized();
return tokenPos > 0;
}
@@ -613,42 +605,60 @@
// Implementation
//-----------------------------------------------------------------------
/**
- * Performs the tokenization if it hasn't already been done.
+ * Checks if tokenization has been done, and if not then do it.
*/
- private void tokenize() {
+ private void checkTokenized() {
if (tokens == null) {
- tokens = readTokens();
+ if (chars == null) {
+ // still call tokenize as subclass may do some work
+ List split = tokenize(null, 0, 0);
+ tokens = (String[]) split.toArray(new String[split.size()]);
+ } else {
+ List split = tokenize(chars, 0, chars.length);
+ tokens = (String[]) split.toArray(new String[split.size()]);
+ }
}
}
/**
- * Read all the tokens.
+ * Internal method to performs the tokenization.
+ * <p>
+ * Most users of this class do not need to call this method. This method
+ * will be called automatically by other (public) methods when required.
+ * <p>
+ * This method exists to allow subclasses to add code before or after the
+ * tokenization. For example, a subclass could alter the character array,
+ * offset or count to be parsed, or call the tokenizer multiple times on
+ * multiple strings. It is also be possible to filter the results.
+ * <p>
+ * <code>StrTokenizer</code> will always pass a zero offset and a count
+ * equal to the length of the array to this method, however a subclass
+ * may pass other values, or even an entirely different array.
*
- * @return array containing the tokens.
- */
- private String[] readTokens() {
- if (chars == null) {
- return ArrayUtils.EMPTY_STRING_ARRAY;
- }
- int len = chars.length;
- if (len == 0) {
- return ArrayUtils.EMPTY_STRING_ARRAY;
+ * @param chars the character array being tokenized, may be null
+ * @param offset the start position within the character array, must be valid
+ * @param count the number of characters to tokenize, must be valid
+ * @return the modifiable list of String tokens, unmodifiable if null array or zero count
+ */
+ protected List tokenize(char[] chars, int offset, int count) {
+ if (chars == null || count == 0) {
+ return Collections.EMPTY_LIST;
}
StrBuilder buf = new StrBuilder();
List tokens = new ArrayList();
- int start = 0;
+ int pos = offset;
// loop around the entire buffer
- while (start >= 0 && start < len) {
+ while (pos >= 0 && pos < count) {
// find next token
- start = readNextToken(chars, start, len, buf, tokens);
+ pos = readNextToken(chars, pos, count, buf, tokens);
// handle case where end of string is a delimiter
- if (start >= len) {
+ if (pos >= count) {
addToken(tokens, "");
}
}
- return (String[]) tokens.toArray(new String[tokens.size()]);
+ return tokens;
}
/**
@@ -1058,10 +1068,7 @@
* @return the string content being parsed
*/
public String getContent() {
- if (text == null) {
- text = new String(chars);
- }
- return text;
+ return new String(chars);
}
//-----------------------------------------------------------------------
Modified: jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java?rev=424608&r1=424607&r2=424608&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java (original)
+++ jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java Sat Jul 22 10:25:38 2006
@@ -17,6 +17,7 @@
package org.apache.commons.lang.text;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.NoSuchElementException;
@@ -531,7 +532,7 @@
public void testGetContent() {
String input = "a b c \"d e\" f ";
StrTokenizer tok = new StrTokenizer(input);
- assertSame(input, tok.getContent());
+ assertEquals(input, tok.getContent());
tok = new StrTokenizer(input.toCharArray());
assertEquals(input, tok.getContent());
@@ -802,6 +803,31 @@
} catch (NoSuchElementException ex) {}
assertEquals(true, tkn.hasPrevious());
assertEquals(false, tkn.hasNext());
+ }
+
+ //-----------------------------------------------------------------------
+ public void testTokenizeSubclassInputChange() {
+ StrTokenizer tkn = new StrTokenizer("a b c d e") {
+ protected List tokenize(char[] chars, int offset, int count) {
+ return super.tokenize("w x y z".toCharArray(), 2, 5);
+ }
+ };
+ assertEquals("x", tkn.next());
+ assertEquals("y", tkn.next());
+ }
+
+ //-----------------------------------------------------------------------
+ public void testTokenizeSubclassOutputChange() {
+ StrTokenizer tkn = new StrTokenizer("a b c") {
+ protected List tokenize(char[] chars, int offset, int count) {
+ List list = super.tokenize(chars, offset, count);
+ Collections.reverse(list);
+ return list;
+ }
+ };
+ assertEquals("c", tkn.next());
+ assertEquals("b", tkn.next());
+ assertEquals("a", tkn.next());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org