You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by sc...@apache.org on 2006/07/22 19:25:38 UTC
svn commit: r424608 - in /jakarta/commons/proper/lang/trunk/src: java/org/apache/commons/lang/text/StrTokenizer.java test/org/apache/commons/lang/text/StrTokenizerTest.java

Author: scolebourne
Date: Sat Jul 22 10:25:38 2006
New Revision: 424608

URL: http://svn.apache.org/viewvc?rev=424608&view=rev
Log:
Allow tokenizer state to be adjusted before and after tokenizing

Modified:
    jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java
    jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java

Modified: jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java?rev=424608&r1=424607&r2=424608&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java (original)
+++ jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java Sat Jul 22 10:25:38 2006
@@ -16,12 +16,11 @@
 package org.apache.commons.lang.text;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.NoSuchElementException;
 
-import org.apache.commons.lang.ArrayUtils;
-
 /**
  * Tokenizes a string based based on delimiters (separators)
  * and supporting quoting and ignored character concepts.
@@ -107,10 +106,8 @@
         TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
     }
 
-    /** The text to work on */
+    /** The text to work on. */
     private char chars[];
-    /** The input text, null if char[] input */
-    private String text;
     /** The parsed tokens */
     private String tokens[];
     /** The current iteration position */
@@ -241,8 +238,7 @@
      */
     public StrTokenizer() {
         super();
-        this.text = "";
-        this.chars = new char[0];
+        this.chars = null;
     }
 
     /**
@@ -253,7 +249,6 @@
      */
     public StrTokenizer(String input) {
         super();
-        text = input;
         if (input != null) {
             chars = input.toCharArray();
         } else {
@@ -331,7 +326,6 @@
      */
     public StrTokenizer(char[] input) {
         super();
-        this.text = null;
         this.chars = input;
     }
 
@@ -417,7 +411,7 @@
      * @return the number of matched tokens
      */
     public int size() {
-        tokenize();
+        checkTokenized();
         return tokens.length;
     }
 
@@ -451,7 +445,7 @@
      * @return the tokens as a String array
      */
     public String[] getTokenArray() {
-        tokenize();
+        checkTokenized();
         return (String[]) tokens.clone();
     }
 
@@ -461,7 +455,7 @@
      * @return the tokens as a String array
      */
     public List getTokenList() {
-        tokenize();
+        checkTokenized();
         List list = new ArrayList(tokens.length);
         for (int i = 0; i < tokens.length; i++) {
             list.add(tokens[i]);
@@ -492,11 +486,10 @@
      */
     public StrTokenizer reset(String input) {
         reset();
-        text = input;
         if (input != null) {
-            chars = input.toCharArray();
+            this.chars = input.toCharArray();
         } else {
-            chars = null;
+            this.chars = null;
         }
         return this;
     }
@@ -514,8 +507,7 @@
      */
     public StrTokenizer reset(char[] input) {
         reset();
-        text = null;
-        chars = input;
+        this.chars = input;
         return this;
     }
 
@@ -527,7 +519,7 @@
      * @return true if there are more tokens
      */
     public boolean hasNext() {
-        tokenize();
+        checkTokenized();
         return tokenPos < tokens.length;
     }
 
@@ -558,7 +550,7 @@
      * @return true if there are previous tokens
      */
     public boolean hasPrevious() {
-        tokenize();
+        checkTokenized();
         return tokenPos > 0;
     }
 
@@ -613,42 +605,60 @@
     // Implementation
     //-----------------------------------------------------------------------
     /**
-     * Performs the tokenization if it hasn't already been done.
+     * Checks if tokenization has been done, and if not then do it.
      */
-    private void tokenize() {
+    private void checkTokenized() {
         if (tokens == null) {
-            tokens = readTokens();
+            if (chars == null) {
+                // still call tokenize as subclass may do some work
+                List split = tokenize(null, 0, 0);
+                tokens = (String[]) split.toArray(new String[split.size()]);
+            } else {
+                List split = tokenize(chars, 0, chars.length);
+                tokens = (String[]) split.toArray(new String[split.size()]);
+            }
         }
     }
 
     /**
-     * Read all the tokens.
+     * Internal method to performs the tokenization.
+     * <p>
+     * Most users of this class do not need to call this method. This method
+     * will be called automatically by other (public) methods when required.
+     * <p>
+     * This method exists to allow subclasses to add code before or after the
+     * tokenization. For example, a subclass could alter the character array,
+     * offset or count to be parsed, or call the tokenizer multiple times on
+     * multiple strings. It is also be possible to filter the results.
+     * <p>
+     * <code>StrTokenizer</code> will always pass a zero offset and a count
+     * equal to the length of the array to this method, however a subclass
+     * may pass other values, or even an entirely different array.
      * 
-     * @return array containing the tokens.
-     */
-    private String[] readTokens() {
-        if (chars == null) {
-            return ArrayUtils.EMPTY_STRING_ARRAY;
-        }
-        int len = chars.length;
-        if (len == 0) {
-            return ArrayUtils.EMPTY_STRING_ARRAY;
+     * @param chars  the character array being tokenized, may be null
+     * @param offset  the start position within the character array, must be valid
+     * @param count  the number of characters to tokenize, must be valid
+     * @return the modifiable list of String tokens, unmodifiable if null array or zero count
+     */
+    protected List tokenize(char[] chars, int offset, int count) {
+        if (chars == null || count == 0) {
+            return Collections.EMPTY_LIST;
         }
         StrBuilder buf = new StrBuilder();
         List tokens = new ArrayList();
-        int start = 0;
+        int pos = offset;
         
         // loop around the entire buffer
-        while (start >= 0 && start < len) {
+        while (pos >= 0 && pos < count) {
             // find next token
-            start = readNextToken(chars, start, len, buf, tokens);
+            pos = readNextToken(chars, pos, count, buf, tokens);
             
             // handle case where end of string is a delimiter
-            if (start >= len) {
+            if (pos >= count) {
                 addToken(tokens, "");
             }
         }
-        return (String[]) tokens.toArray(new String[tokens.size()]);
+        return tokens;
     }
 
     /**
@@ -1058,10 +1068,7 @@
      * @return the string content being parsed
      */
     public String getContent() {
-        if (text == null) {
-            text = new String(chars);
-        }
-        return text;
+        return new String(chars);
     }
 
     //-----------------------------------------------------------------------

Modified: jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java?rev=424608&r1=424607&r2=424608&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java (original)
+++ jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java Sat Jul 22 10:25:38 2006
@@ -17,6 +17,7 @@
 package org.apache.commons.lang.text;
 
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.NoSuchElementException;
 
@@ -531,7 +532,7 @@
     public void testGetContent() {
         String input = "a   b c \"d e\" f ";
         StrTokenizer tok = new StrTokenizer(input);
-        assertSame(input, tok.getContent());
+        assertEquals(input, tok.getContent());
 
         tok = new StrTokenizer(input.toCharArray());
         assertEquals(input, tok.getContent());
@@ -802,6 +803,31 @@
         } catch (NoSuchElementException ex) {}
         assertEquals(true, tkn.hasPrevious());
         assertEquals(false, tkn.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testTokenizeSubclassInputChange() {
+        StrTokenizer tkn = new StrTokenizer("a b c d e") {
+            protected List tokenize(char[] chars, int offset, int count) {
+                return super.tokenize("w x y z".toCharArray(), 2, 5);
+            }
+        };
+        assertEquals("x", tkn.next());
+        assertEquals("y", tkn.next());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testTokenizeSubclassOutputChange() {
+        StrTokenizer tkn = new StrTokenizer("a b c") {
+            protected List tokenize(char[] chars, int offset, int count) {
+                List list = super.tokenize(chars, offset, count);
+                Collections.reverse(list);
+                return list;
+            }
+        };
+        assertEquals("c", tkn.next());
+        assertEquals("b", tkn.next());
+        assertEquals("a", tkn.next());
     }
 
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org