You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by sc...@apache.org on 2005/08/20 15:58:46 UTC
svn commit: r234028 - in /jakarta/commons/proper/lang/trunk/src: java/org/apache/commons/lang/text/StrTokenizer.java test/org/apache/commons/lang/text/StrTokenizerTest.java

Author: scolebourne
Date: Sat Aug 20 06:58:41 2005
New Revision: 234028

URL: http://svn.apache.org/viewcvs?rev=234028&view=rev
Log:
Don't clone char array input, and handle nulls better

Modified:
    jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java
    jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java

Modified: jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java
URL: http://svn.apache.org/viewcvs/jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java?rev=234028&r1=234027&r2=234028&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java (original)
+++ jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/StrTokenizer.java Sat Aug 20 06:58:41 2005
@@ -19,6 +19,8 @@
 import java.util.List;
 import java.util.ListIterator;
 
+import org.apache.commons.lang.ArrayUtils;
+
 /**
  * Tokenizes a string based based on delimiters (separators)
  * and supporting quoting and ignored character concepts.
@@ -242,8 +244,12 @@
      */
     public StrTokenizer(String input) {
         super();
-        this.text = input;
-        this.chars = input.toCharArray();  // no clone as toCharArray() clones
+        text = input;
+        if (input != null) {
+            chars = input.toCharArray();
+        } else {
+            chars = null;
+        }
     }
 
     /**
@@ -308,19 +314,25 @@
     /**
      * Constructs a tokenizer splitting on space, tab, newline and formfeed
      * as per StringTokenizer.
+     * <p>
+     * The input character array is not cloned, and must not be altered after
+     * passing in to this method.
      *
-     * @param input  the string which is to be parsed, cloned
+     * @param input  the string which is to be parsed, not cloned
      */
     public StrTokenizer(char[] input) {
         super();
         this.text = null;
-        this.chars = (char[]) input.clone();
+        this.chars = input;
     }
 
     /**
      * Constructs a tokenizer splitting on the specified character.
+     * <p>
+     * The input character array is not cloned, and must not be altered after
+     * passing in to this method.
      *
-     * @param input  the string which is to be parsed, cloned
+     * @param input  the string which is to be parsed, not cloned
      * @param delim the field delimiter character
      */
     public StrTokenizer(char[] input, char delim) {
@@ -330,8 +342,11 @@
 
     /**
      * Constructs a tokenizer splitting on the specified string.
+     * <p>
+     * The input character array is not cloned, and must not be altered after
+     * passing in to this method.
      *
-     * @param input  the string which is to be parsed, cloned
+     * @param input  the string which is to be parsed, not cloned
      * @param delim the field delimiter string
      */
     public StrTokenizer(char[] input, String delim) {
@@ -341,8 +356,11 @@
 
     /**
      * Constructs a tokenizer splitting using the specified delimiter matcher.
+     * <p>
+     * The input character array is not cloned, and must not be altered after
+     * passing in to this method.
      *
-     * @param input  the string which is to be parsed, cloned
+     * @param input  the string which is to be parsed, not cloned
      * @param delim  the field delimiter matcher
      */
     public StrTokenizer(char[] input, StrMatcher delim) {
@@ -353,8 +371,11 @@
     /**
      * Constructs a tokenizer splitting on the specified delimiter character
      * and handling quotes using the specified quote character.
+     * <p>
+     * The input character array is not cloned, and must not be altered after
+     * passing in to this method.
      *
-     * @param input  the string which is to be parsed, cloned
+     * @param input  the string which is to be parsed, not cloned
      * @param delim  the field delimiter character
      * @param quote  the field quoted string character
      */
@@ -366,8 +387,11 @@
     /**
      * Constructs a tokenizer splitting using the specified delimiter matcher
      * and handling quotes using the specified quote matcher.
+     * <p>
+     * The input character array is not cloned, and must not be altered after
+     * passing in to this method.
      *
-     * @param input  the string which is to be parsed, cloned
+     * @param input  the string which is to be parsed, not cloned
      * @param delim  the field delimiter character
      * @param quote  the field quoted string character
      */
@@ -437,25 +461,32 @@
      * In this manner you can re-use a tokenizer with the same settings
      * on multiple input lines.
      *
-     * @param input  the new string to tokenize
+     * @param input  the new string to tokenize, null sets no text to parse
      */
     public void reset(String input) {
         reset();
-        this.text = input;
-        chars = input.toCharArray();  // no clone as toCharArray() clones
+        text = input;
+        if (input != null) {
+            chars = input.toCharArray();
+        } else {
+            chars = null;
+        }
     }
 
     /**
      * Reset this tokenizer, giving it a new input string to parse.
      * In this manner you can re-use a tokenizer with the same settings
      * on multiple input lines.
+     * <p>
+     * The input character array is not cloned, and must not be altered after
+     * passing in to this method.
      *
-     * @param input  the new character array to tokenize, cloned
+     * @param input  the new character array to tokenize, not cloned, null sets no text to parse
      */
-    public void reset(char [] input) {
+    public void reset(char[] input) {
         reset();
-        this.text = null;
-        chars = (char[]) input.clone();
+        text = null;
+        chars = input;
     }
 
     // ListIterator
@@ -560,6 +591,9 @@
      * @return array containing the tokens.
      */
     private String[] readTokens() {
+        if (chars == null) {
+            return ArrayUtils.EMPTY_STRING_ARRAY;
+        }
         int len = chars.length;
         char cbuf[] = new char[len];
         StringBuffer token = new StringBuffer();
@@ -812,7 +846,7 @@
     }
 
     /**
-     * Sets the field delimiter character
+     * Sets the field delimiter character.
      *
      * @param delim  the delimiter character to use
      */
@@ -821,9 +855,9 @@
     }
 
     /**
-     * Sets the field delimiter character
+     * Sets the field delimiter string.
      *
-     * @param delim  the delimiter character to use
+     * @param delim  the delimiter string to use
      */
     public void setDelimiterString(String delim) {
         setDelimiterMatcher(StrMatcher.stringMatcher(delim));

Modified: jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java
URL: http://svn.apache.org/viewcvs/jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java?rev=234028&r1=234027&r2=234028&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java (original)
+++ jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/StrTokenizerTest.java Sat Aug 20 06:58:41 2005
@@ -348,24 +348,138 @@
         assertEquals(input, tok.getContent());
     }
 
+    //-----------------------------------------------------------------------
+    public void testConstructor_String() {
+        StrTokenizer tok = new StrTokenizer("a b");
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer("");
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer((String) null);
+        assertEquals(false, tok.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testConstructor_String_char() {
+        StrTokenizer tok = new StrTokenizer("a b", ' ');
+        assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1));
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer("", ' ');
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer((String) null, ' ');
+        assertEquals(false, tok.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testConstructor_String_char_char() {
+        StrTokenizer tok = new StrTokenizer("a b", ' ', '"');
+        assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1));
+        assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 0, 1));
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer("", ' ', '"');
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer((String) null, ' ', '"');
+        assertEquals(false, tok.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testConstructor_charArray() {
+        StrTokenizer tok = new StrTokenizer("a b".toCharArray());
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer(new char[0]);
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer((char[]) null);
+        assertEquals(false, tok.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testConstructor_charArray_char() {
+        StrTokenizer tok = new StrTokenizer("a b".toCharArray(), ' ');
+        assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1));
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer(new char[0], ' ');
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer((char[]) null, ' ');
+        assertEquals(false, tok.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testConstructor_charArray_char_char() {
+        StrTokenizer tok = new StrTokenizer("a b".toCharArray(), ' ', '"');
+        assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1));
+        assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 0, 1));
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer(new char[0], ' ', '"');
+        assertEquals(false, tok.hasNext());
+        
+        tok = new StrTokenizer((char[]) null, ' ', '"');
+        assertEquals(false, tok.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
     public void testReset() {
-        String input = "a b c";
-        StrTokenizer tok = new StrTokenizer(input);
+        StrTokenizer tok = new StrTokenizer("a b c");
         assertEquals("a", tok.next());
         assertEquals("b", tok.next());
         assertEquals("c", tok.next());
+        assertEquals(false, tok.hasNext());
+        
         tok.reset();
         assertEquals("a", tok.next());
         assertEquals("b", tok.next());
         assertEquals("c", tok.next());
+        assertEquals(false, tok.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testReset_String() {
+        StrTokenizer tok = new StrTokenizer("x x x");
         tok.reset("d e");
         assertEquals("d", tok.next());
         assertEquals("e", tok.next());
-        tok.reset("f g".toCharArray());
-        assertEquals("f", tok.next());
-        assertEquals("g", tok.next());
+        assertEquals(false, tok.hasNext());
+        
+        tok.reset((String) null);
+        assertEquals(false, tok.hasNext());
+    }
+
+    //-----------------------------------------------------------------------
+    public void testReset_charArray() {
+        StrTokenizer tok = new StrTokenizer("x x x");
+        
+        char[] array = new char[] {'a', ' ', 'c'};
+        tok.reset(array);
+        array[1] = 'b'; // test linked array
+        assertEquals("abc", tok.next());
+        assertEquals(false, tok.hasNext());
+        
+        tok.reset((char[]) null);
+        assertEquals(false, tok.hasNext());
     }
 
+    //-----------------------------------------------------------------------
     public void testTSV() {
         this.testXSVAbc(StrTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE));
         this.testXSVAbc(StrTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE.toCharArray()));



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org