You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/08/02 04:12:23 UTC
[06/12] lucenenet git commit: Lucene.Net.Support.StringTokenizer: Did
a fresh port from Apache Harmony and ported tests
Lucene.Net.Support.StringTokenizer: Did a fresh port from Apache Harmony and ported tests
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/eaf47793
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/eaf47793
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/eaf47793
Branch: refs/heads/master
Commit: eaf47793073e0465c590c708d8a80b9c33140f58
Parents: cd2d351
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Mon Jul 31 12:28:03 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 2 09:53:19 2017 +0700
----------------------------------------------------------------------
src/Lucene.Net.Tests/Lucene.Net.Tests.csproj | 1 +
.../Support/TestStringTokenizer.cs | 353 +++++++++++++++
src/Lucene.Net/Support/StringTokenizer.cs | 438 ++++++++++---------
3 files changed, 581 insertions(+), 211 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eaf47793/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj b/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj
index 13282e1..495bd1f 100644
--- a/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj
+++ b/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj
@@ -525,6 +525,7 @@
<Compile Include="Support\SmallObject.cs" />
<Compile Include="Support\TestDictionaryExtensions.cs" />
<Compile Include="Support\TestPriorityQueue.cs" />
+ <Compile Include="Support\TestStringTokenizer.cs" />
<Compile Include="Support\Threading\TestCloseableThreadLocal.cs" />
<Compile Include="Support\TestCollections.cs" />
<Compile Include="Support\TestEquatableList.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eaf47793/src/Lucene.Net.Tests/Support/TestStringTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Support/TestStringTokenizer.cs b/src/Lucene.Net.Tests/Support/TestStringTokenizer.cs
new file mode 100644
index 0000000..5de8588
--- /dev/null
+++ b/src/Lucene.Net.Tests/Support/TestStringTokenizer.cs
@@ -0,0 +1,353 @@
+// This class was sourced from the Apache Harmony project
+// https://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/
+
+using Lucene.Net.Attributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Support
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestStringTokenizer : LuceneTestCase
+ {
+ /**
+ * @tests java.util.StringTokenizer#StringTokenizer(java.lang.String)
+ */
+ [Test, LuceneNetSpecific]
+ public void Test_ConstructorLjava_lang_String()
+ {
+ // Test for method java.util.StringTokenizer(java.lang.String)
+ assertTrue("Used in tests", true);
+ }
+
+ /**
+ * @tests java.util.StringTokenizer#StringTokenizer(java.lang.String,
+ * java.lang.String)
+ */
+ [Test, LuceneNetSpecific]
+ public void Test_ConstructorLjava_lang_StringLjava_lang_String()
+ {
+ // Test for method java.util.StringTokenizer(java.lang.String,
+ // java.lang.String)
+ StringTokenizer st = new StringTokenizer("This:is:a:test:String", ":");
+ assertTrue("Created incorrect tokenizer", st.CountTokens() == 5
+ && (st.NextToken().equals("This")));
+ }
+
+ /**
+ * @tests java.util.StringTokenizer#StringTokenizer(java.lang.String,
+ * java.lang.String, boolean)
+ */
+ [Test, LuceneNetSpecific]
+ public void Test_ConstructorLjava_lang_StringLjava_lang_StringZ()
+ {
+ // Test for method java.util.StringTokenizer(java.lang.String,
+ // java.lang.String, boolean)
+ StringTokenizer st = new StringTokenizer("This:is:a:test:String", ":",
+ true);
+ st.NextToken();
+ assertTrue("Created incorrect tokenizer", st.CountTokens() == 8
+ && (st.NextToken().equals(":")));
+ }
+
+ /**
+ * @tests java.util.StringTokenizer#countTokens()
+ */
+ [Test, LuceneNetSpecific]
+ public void Test_countTokens()
+ {
+ // Test for method int java.util.StringTokenizer.countTokens()
+ StringTokenizer st = new StringTokenizer("This is a test String");
+
+ assertEquals("Incorrect token count returned", 5, st.CountTokens());
+ }
+
+ ///**
+ // * @tests java.util.StringTokenizer#hasMoreElements()
+ // */
+ //[Test, LuceneNetSpecific]
+ //public void test_hasMoreElements()
+ //{
+ // // Test for method boolean java.util.StringTokenizer.hasMoreElements()
+
+ // StringTokenizer st = new StringTokenizer("This is a test String");
+ // st.NextToken();
+ // assertTrue("hasMoreElements returned incorrect value", st
+ // .hasMoreElements());
+ // st.NextToken();
+ // st.NextToken();
+ // st.NextToken();
+ // st.NextToken();
+ // assertTrue("hasMoreElements returned incorrect value", !st
+ // .hasMoreElements());
+ //}
+
+ /**
+ * @tests java.util.StringTokenizer#hasMoreTokens()
+ */
+ [Test, LuceneNetSpecific]
+ public void Test_hasMoreTokens()
+ {
+ // Test for method boolean java.util.StringTokenizer.hasMoreTokens()
+ StringTokenizer st = new StringTokenizer("This is a test String");
+ for (int counter = 0; counter < 5; counter++)
+ {
+ assertTrue(
+ "StringTokenizer incorrectly reports it has no more tokens",
+ st.HasMoreTokens());
+ st.NextToken();
+ }
+ assertTrue("StringTokenizer incorrectly reports it has more tokens",
+ !st.HasMoreTokens());
+ }
+
+ ///**
+ // * @tests java.util.StringTokenizer#nextElement()
+ // */
+ //[Test, LuceneNetSpecific]
+ //public void test_nextElement()
+ //{
+ // // Test for method java.lang.Object
+ // // java.util.StringTokenizer.nextElement()
+ // StringTokenizer st = new StringTokenizer("This is a test String");
+ // assertEquals("nextElement returned incorrect value", "This", ((String)st
+ // .NextToken()));
+ // assertEquals("nextElement returned incorrect value", "is", ((String)st
+ // .NextToken()));
+ // assertEquals("nextElement returned incorrect value", "a", ((String)st
+ // .NextToken()));
+ // assertEquals("nextElement returned incorrect value", "test", ((String)st
+ // .NextToken()));
+ // assertEquals("nextElement returned incorrect value", "String", ((String)st
+ // .NextToken()));
+ // try
+ // {
+ // st.NextToken();
+ // fail(
+ // "nextElement failed to throw a NoSuchElementException when it should have been out of elements");
+ // }
+ // catch (InvalidOperationException e)
+ // {
+ // return;
+ // }
+ //}
+
+ /**
+ * @tests java.util.StringTokenizer#nextToken()
+ */
+ [Test, LuceneNetSpecific]
+ public void Test_nextToken()
+ {
+ // Test for method java.lang.String
+ // java.util.StringTokenizer.nextToken()
+ StringTokenizer st = new StringTokenizer("This is a test String");
+ assertEquals("nextToken returned incorrect value",
+ "This", st.NextToken());
+ assertEquals("nextToken returned incorrect value",
+ "is", st.NextToken());
+ assertEquals("nextToken returned incorrect value",
+ "a", st.NextToken());
+ assertEquals("nextToken returned incorrect value",
+ "test", st.NextToken());
+ assertEquals("nextToken returned incorrect value",
+ "String", st.NextToken());
+ try
+ {
+ st.NextToken();
+ fail(
+ "nextToken failed to throw a NoSuchElementException when it should have been out of elements");
+ }
+#pragma warning disable 168
+ catch (InvalidOperationException e)
+#pragma warning restore 168
+ {
+ return;
+ }
+ }
+
+ /**
+ * @tests java.util.StringTokenizer#nextToken(java.lang.String)
+ */
+ [Test, LuceneNetSpecific]
+ public void Test_nextTokenLjava_lang_String()
+ {
+ // Test for method java.lang.String
+ // java.util.StringTokenizer.nextToken(java.lang.String)
+ StringTokenizer st = new StringTokenizer("This is a test String");
+ assertEquals("nextToken(String) returned incorrect value with normal token String",
+ "This", st.NextToken(" "));
+ assertEquals("nextToken(String) returned incorrect value with custom token String",
+ " is a ", st.NextToken("tr"));
+ assertEquals("calling nextToken() did not use the new default delimiter list",
+ "es", st.NextToken());
+ }
+
+ //[Test, LuceneNetSpecific]
+ //public void test_hasMoreElements_NPE()
+ //{
+ // StringTokenizer stringTokenizer = new StringTokenizer(new String(),
+ // (String)null, true);
+ // try
+ // {
+ // stringTokenizer.HasMoreElements();
+ // fail("should throw NullPointerException");
+ // }
+ // catch (NullPointerException e)
+ // {
+ // // Expected
+ // }
+
+ // stringTokenizer = new StringTokenizer(new String(), (String)null);
+ // try
+ // {
+ // stringTokenizer.hasMoreElements();
+ // fail("should throw NullPointerException");
+ // }
+ // catch (NullPointerException e)
+ // {
+ // // Expected
+ // }
+ //}
+
+ [Test, LuceneNetSpecific]
+ public void Test_hasMoreTokens_NPE()
+ {
+ StringTokenizer stringTokenizer = new StringTokenizer("",
+ (String)null, true);
+ try
+ {
+ stringTokenizer.HasMoreTokens();
+ fail("should throw NullPointerException");
+ }
+#pragma warning disable 168
+ catch (ArgumentNullException e)
+#pragma warning restore 168
+ {
+ // Expected
+ }
+
+ stringTokenizer = new StringTokenizer("", (String)null);
+ try
+ {
+ stringTokenizer.HasMoreTokens();
+ fail("should throw NullPointerException");
+ }
+#pragma warning disable 168
+ catch (ArgumentNullException e)
+#pragma warning restore 168
+ {
+ // Expected
+ }
+ }
+
+ //[Test, LuceneNetSpecific]
+ //public void test_nextElement_NPE()
+ //{
+ // StringTokenizer stringTokenizer = new StringTokenizer(new string(),
+ // (String)null, true);
+ // try
+ // {
+ // stringTokenizer.NextToken();
+ // fail("should throw NullPointerException");
+ // }
+ // catch (ArgumentNullException e)
+ // {
+ // // Expected
+ // }
+
+ // stringTokenizer = new StringTokenizer(new String(), (String)null);
+ // try
+ // {
+ // stringTokenizer.NextToken();
+ // fail("should throw NullPointerException");
+ // }
+ // catch (ArgumentNullException e)
+ // {
+ // // Expected
+ // }
+ //}
+
+ [Test, LuceneNetSpecific]
+ public void Test_nextToken_NPE()
+ {
+ StringTokenizer stringTokenizer = new StringTokenizer("",
+ (String)null, true);
+ try
+ {
+ stringTokenizer.NextToken();
+ fail("should throw NullPointerException");
+ }
+#pragma warning disable 168
+ catch (ArgumentNullException e)
+#pragma warning restore 168
+ {
+ // Expected
+ }
+
+ stringTokenizer = new StringTokenizer("", (String)null);
+ try
+ {
+ stringTokenizer.NextToken();
+ fail("should throw NullPointerException");
+ }
+#pragma warning disable 168
+ catch (ArgumentNullException e)
+#pragma warning restore 168
+ {
+ // Expected
+ }
+ }
+
+ [Test, LuceneNetSpecific]
+ public void Test_nextTokenLjava_lang_String_NPE()
+ {
+ StringTokenizer stringTokenizer = new StringTokenizer("");
+ try
+ {
+ stringTokenizer.NextToken(null);
+ fail("should throw NullPointerException");
+ }
+#pragma warning disable 168
+ catch (ArgumentNullException e)
+#pragma warning restore 168
+ {
+ // Expected
+ }
+ }
+
+ /**
+ * Sets up the fixture, for example, open a network connection. This method
+ * is called before a test is executed.
+ */
+ public override void SetUp()
+ {
+ base.SetUp();
+ }
+
+ /**
+ * Tears down the fixture, for example, close a network connection. This
+ * method is called after a test is executed.
+ */
+ public override void TearDown()
+ {
+ base.TearDown();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eaf47793/src/Lucene.Net/Support/StringTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net/Support/StringTokenizer.cs b/src/Lucene.Net/Support/StringTokenizer.cs
index 46843d3..1ab0db4 100644
--- a/src/Lucene.Net/Support/StringTokenizer.cs
+++ b/src/Lucene.Net/Support/StringTokenizer.cs
@@ -1,254 +1,270 @@
-using System;
-using System.Collections.Generic;
-using System.Text;
+// This class was sourced from the Apache Harmony project
+// https://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/
+
+using System;
namespace Lucene.Net.Support
{
/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- public sealed class StringTokenizer
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// The <see cref="StringTokenizer"/> class allows an application to break a string
+ /// into tokens by performing code point comparison. The <see cref="StringTokenizer"/>
+ /// methods do not distinguish among identifiers, numbers, and quoted strings,
+ /// nor do they recognize and skip comments.
+ /// </summary>
+ /// <remarks>
+ /// The set of delimiters (the codepoints that separate tokens) may be specified
+ /// either at creation time or on a per-token basis.
+ /// <p>
+ /// An instance of <see cref="StringTokenizer"/> behaves in one of three ways,
+ /// depending on whether it was created with the <c>returnDelimiters</c> flag
+ /// having the value <c>true</c> or <c>false</c>:
+ /// <list type="bullet">
+ /// <item><description>If returnDelims is <c>false</c>, delimiter code points serve to separate
+ /// tokens. A token is a maximal sequence of consecutive code points that are not
+ /// delimiters.</description>
+ /// <item><description>If returnDelims is <c>true</c>, delimiter code points are themselves
+ /// considered to be tokens. In this case a token will be received for each
+ /// delimiter code point.</description></item>
+ /// </list>
+ /// <para/>
+ /// A token is thus either one delimiter code point, or a maximal sequence of
+ /// consecutive code points that are not delimiters.
+ /// <para/>
+ /// A <see cref="StringTokenizer"/> object internally maintains a current position
+ /// within the string to be tokenized. Some operations advance this current
+ /// position past the code point processed.
+ /// <para/>
+ /// A token is returned by taking a substring of the string that was used to
+ /// create the <see cref="StringTokenizer"/> object.
+ /// <para/>
+ /// Here's an example of the use of the default delimiter <see cref="StringTokenizer"/>:
+ ///
+ /// <code>
+ /// StringTokenizer st = new StringTokenizer("this is a test");
+ /// while (st.HasMoreTokens()) {
+ /// println(st.NextToken());
+ /// }
+ /// </code>
+ ///
+ /// <para/>
+ /// This prints the following output:
+ ///
+ /// <code>
+ /// this
+ /// is
+ /// a
+ /// test
+ /// </code>
+ ///
+ /// <para/>
+ /// Here's an example of how to use a <see cref="StringTokenizer"/> with a user
+ /// specified delimiter:
+ ///
+ /// <code>
+ /// StringTokenizer st = new StringTokenizer(
+ /// "this is a test with supplementary characters \ud800\ud800\udc00\udc00",
+ /// " \ud800\udc00");
+ /// while (st.HasMoreTokens()) {
+ /// println(st.NextToken());
+ /// }
+ /// </code>
+ ///
+ /// <para/>
+ /// This prints the following output:
+ ///
+ /// <code>
+ /// this
+ /// is
+ /// a
+ /// test
+ /// with
+ /// supplementary
+ /// characters
+ /// \ud800
+ /// \udc00
+ /// </code>
+ ///
+ /// </remarks>
+ public class StringTokenizer
{
- private readonly TokenParsingStrategy _strategy;
-
- private const int _preprocessThreshold = 1024; // 1024 chars -- TODO: empirically determine best threshold
-
- public StringTokenizer(string str, string delim, bool returnDelims)
- {
- if (str == null)
- throw new ArgumentNullException("str");
-
- if (string.IsNullOrEmpty(delim))
- throw new ArgumentException("No delimiter characters given!");
-
- var delimSet = new HashSet<char>(delim.ToCharArray());
-
- if (str.Length > _preprocessThreshold)
- {
- _strategy = new StringBuilderTokenParsingStrategy(str, delimSet, returnDelims);
- }
- else
- {
- _strategy = new PreProcessTokenParsingStrategy(str, delimSet, returnDelims);
- }
- }
-
- public StringTokenizer(string str, string delim)
- : this(str, delim, false)
- {
- }
-
+ private string str;
+ private string delimiters;
+ private bool returnDelimiters;
+ private int position;
+
+ /// <summary>
+ /// Constructs a new <see cref="StringTokenizer"/> for the parameter string using
+ /// whitespace as the delimiter. The <see cref="returnDelimiters"/> flag is set to
+ /// <c>false</c>.
+ /// </summary>
+ /// <param name="str">The string to be tokenized.</param>
public StringTokenizer(string str)
: this(str, " \t\n\r\f", false)
{
}
- public bool HasMoreTokens()
- {
- return _strategy.HasMoreTokens();
- }
-
- public string NextToken()
- {
- return _strategy.NextToken();
- }
-
- //public string NextToken(string delim)
- //{
- // if (string.IsNullOrEmpty(delim))
- // throw new ArgumentException("No delimiter characters given!");
-
- // _delims.Clear();
- // _delims.UnionWith(delim.ToCharArray());
-
- // return NextToken();
- //}
-
- public int CountTokens()
+ /// <summary>
+ /// Constructs a new <see cref="StringTokenizer"/> for the parameter string using
+ /// the specified delimiters. The <see cref="returnDelimiters"/> flag is set to
+ /// <c>false</c>. If <paramref name="delimiters"/> is <c>null</c>, this constructor
+ /// doesn't throw an <see cref="Exception"/>, but later calls to some methods might
+ /// throw an <see cref="ArgumentNullException"/> or <see cref="InvalidOperationException"/>.
+ /// </summary>
+ /// <param name="str">The string to be tokenized.</param>
+ /// <param name="delimiters">The delimiters to use.</param>
+ public StringTokenizer(string str, string delimiters)
+ : this(str, delimiters, false)
{
- return _strategy.CountTokens();
}
- private abstract class TokenParsingStrategy
+ /// <summary>
+ /// Constructs a new <see cref="StringTokenizer"/> for the parameter string using
+ /// the specified delimiters, returning the delimiters as tokens if the
+ /// parameter <paramref name="returnDelimiters"/> is <c>true</c>. If <paramref name="delimiters"/>
+ /// is null this constructor doesn't throw an <see cref="Exception"/>, but later
+ /// calls to some methods might throw an <see cref="ArgumentNullException"/> or <see cref="InvalidOperationException"/>.
+ /// </summary>
+ /// <param name="str">The string to be tokenized.</param>
+ /// <param name="delimiters">The delimiters to use.</param>
+ /// <param name="returnDelimiters"><c>true</c> to return each delimiter as a token.</param>
+ public StringTokenizer(string str, string delimiters,
+ bool returnDelimiters)
{
- public abstract bool HasMoreTokens();
-
- public abstract string NextToken();
-
- public abstract int CountTokens();
- }
-
- private class StringBuilderTokenParsingStrategy : TokenParsingStrategy
- {
- private readonly string _str;
- private readonly ISet<char> _delims;
- private readonly bool _returnDelims;
-
- private int _position = 0;
-
- public StringBuilderTokenParsingStrategy(string str, ISet<char> delims, bool returnDelims)
+ if (str != null)
{
- _str = str;
- _delims = delims;
- _returnDelims = returnDelims;
+ this.str = str;
+ this.delimiters = delimiters;
+ this.returnDelimiters = returnDelimiters;
+ this.position = 0;
}
+ else
+ throw new ArgumentNullException("str");
+ }
- public override bool HasMoreTokens()
+ /// <summary>
+ /// Returns the number of unprocessed tokens remaining in the string.
+ /// </summary>
+ /// <returns>number of tokens that can be retreived before an
+ /// <see cref="Exception"/> will result from a call to <see cref="NextToken()"/>.</returns>
+ public virtual int CountTokens()
+ {
+ int count = 0;
+ bool inToken = false;
+ for (int i = position, length = str.Length; i < length; i++)
{
- if (_position >= _str.Length)
- return false;
-
- if (_returnDelims)
- return true; // since we're not at end of string, there has to be a token left if returning delimiters
-
- for (int i = _position; i < _str.Length; i++)
+ if (delimiters.IndexOf(str[i], 0) >= 0)
{
- if (!_delims.Contains(_str[i]))
- return true;
- }
-
- return false; // only delims left
- }
-
- public override string NextToken()
- {
- if (_position >= _str.Length)
- throw new InvalidOperationException("Past end of string.");
-
- if (_returnDelims && _delims.Contains(_str[_position]))
- {
- _position++;
- return _str[_position].ToString();
- }
-
- StringBuilder sb = new StringBuilder();
-
- for (int i = _position; i < _str.Length; i++)
- {
- char c = _str[i];
-
- _position = i;
-
- if (_delims.Contains(c))
+ if (returnDelimiters)
+ count++;
+ if (inToken)
{
- break;
- }
- else
- {
- sb.Append(c);
+ count++;
+ inToken = false;
}
}
-
- return sb.ToString();
- }
-
- public override int CountTokens()
- {
- if (_position >= _str.Length)
- return 0;
-
- int count = 0;
- bool lastWasDelim = true; // consider start of string/substring a delim
-
- for (int i = _position; i < _str.Length; i++)
+ else
{
- char c = _str[i];
-
- if (_delims.Contains(c))
- {
- if (!lastWasDelim)
- count++; // increase since now we're at a delim
-
- lastWasDelim = true;
-
- if (_returnDelims)
- count++; // this delim counts as a token
- }
- else
- {
- lastWasDelim = false;
- }
+ inToken = true;
}
-
- if (!lastWasDelim)
- count++; // string ended with non-delim
-
- return count;
}
+ if (inToken)
+ count++;
+ return count;
}
- private class PreProcessTokenParsingStrategy : TokenParsingStrategy
+ /// <summary>
+ /// Returns <c>true</c> if unprocessed tokens remain.
+ /// </summary>
+ /// <returns><c>true</c> if unprocessed tokens remain.</returns>
+ public bool HasMoreTokens()
{
- private readonly string _str;
- private readonly ISet<char> _delims;
- private readonly bool _returnDelims;
- private readonly List<string> _tokens = new List<string>();
- private int _index = 0;
-
- public PreProcessTokenParsingStrategy(string str, ISet<char> delims, bool returnDelims)
+ if (delimiters == null)
{
- _str = str;
- _delims = delims;
- _returnDelims = returnDelims;
-
- Preprocess();
+ throw new ArgumentNullException("delimiters");
}
-
- private void Preprocess()
+ int length = str.Length;
+ if (position < length)
{
- StringBuilder sb = new StringBuilder();
-
- foreach (char c in _str)
- {
- if (_delims.Contains(c))
- {
- if (sb.Length > 0)
- {
- _tokens.Add(sb.ToString());
- sb.Clear();
- }
+ if (returnDelimiters)
+ return true; // there is at least one character and even if
+ // it is a delimiter it is a token
- if (_returnDelims)
- _tokens.Add(c.ToString());
- }
- else
- {
- sb.Append(c);
- }
- }
-
- if (sb.Length > 0)
- _tokens.Add(sb.ToString());
+ // otherwise find a character which is not a delimiter
+ for (int i = position; i < length; i++)
+ if (delimiters.IndexOf(str[i], 0) == -1)
+ return true;
}
+ return false;
+ }
- public override bool HasMoreTokens()
+ /// <summary>
+ /// Returns the next token in the string as a <see cref="string"/>.
+ /// </summary>
+ /// <returns>Next token in the string as a <see cref="string"/>.</returns>
+ /// <exception cref="InvalidOperationException">If no tokens remain.</exception>
+ public string NextToken()
+ {
+ if (delimiters == null)
{
- return _index < _tokens.Count;
+ throw new ArgumentNullException();
}
+ int i = position;
+ int length = str.Length;
- public override string NextToken()
+ if (i < length)
{
- return _tokens[_index++];
- }
+ if (returnDelimiters)
+ {
+ if (delimiters.IndexOf(str[position], 0) >= 0)
+ return str[position++].ToString();
+ for (position++; position < length; position++)
+ if (delimiters.IndexOf(str[position], 0) >= 0)
+ return str.Substring(i, position - i);
+ return str.Substring(i);
+ }
- public override int CountTokens()
- {
- return _tokens.Count - _index;
+ while (i < length && delimiters.IndexOf(str[i], 0) >= 0)
+ i++;
+ position = i;
+ if (i < length)
+ {
+ for (position++; position < length; position++)
+ if (delimiters.IndexOf(str[position], 0) >= 0)
+ return str.Substring(i, position - i);
+ return str.Substring(i);
+ }
}
+ throw new InvalidOperationException("No more elements");
+ }
+
+ /// <summary>
+ /// Returns the next token in the string as a <see cref="string"/>. The delimiters
+ /// used are changed to the specified delimiters.
+ /// </summary>
+ /// <param name="delims">The new delimiters to use.</param>
+ /// <returns>Next token in the string as a <see cref="string"/>.</returns>
+ /// <exception cref="InvalidOperationException">If no tokens remain.</exception>
+ public string NextToken(string delims)
+ {
+ this.delimiters = delims;
+ return NextToken();
}
}
}
\ No newline at end of file