You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/06/10 00:03:44 UTC

svn commit: r1134106 - /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java

Author: joern
Date: Thu Jun  9 22:03:44 2011
New Revision: 1134106

URL: http://svn.apache.org/viewvc?rev=1134106&view=rev
Log:
OPENNLP-202 Improved white space detection

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java?rev=1134106&r1=1134105&r2=1134106&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java Thu Jun  9 22:03:44 2011
@@ -229,11 +229,11 @@ public class DefaultSDContextGenerator i
    */
   private static final int previousSpaceIndex(CharSequence sb, int seek) {
     seek--;
-    while (seek > 0 && sb.charAt(seek) != ' ') {
+    while (seek > 0 && !StringUtil.isWhitespace(sb.charAt(seek))) {
       seek--;
     }
-    if (seek > 0 && sb.charAt(seek) == ' ') {
-      while (seek > 0 && sb.charAt(seek - 1) == ' ')
+    if (seek > 0 && StringUtil.isWhitespace(sb.charAt(seek))) {
+      while (seek > 0 && StringUtil.isWhitespace(sb.charAt(seek - 1)))
         seek--;
       return seek;
     }
@@ -253,8 +253,8 @@ public class DefaultSDContextGenerator i
     char c;
     while (seek < lastIndex) {
       c = sb.charAt(seek);
-      if (c == ' ' || c == '\n') {
-        while (sb.length() > seek + 1 && sb.charAt(seek + 1) == ' ')
+      if (StringUtil.isWhitespace(c)) {
+        while (sb.length() > seek + 1 && StringUtil.isWhitespace(sb.charAt(seek + 1)))
           seek++;
         return seek;
       }