You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/06/10 00:03:44 UTC
svn commit: r1134106 -
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
Author: joern
Date: Thu Jun 9 22:03:44 2011
New Revision: 1134106
URL: http://svn.apache.org/viewvc?rev=1134106&view=rev
Log:
OPENNLP-202 Improved white space detection
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java?rev=1134106&r1=1134105&r2=1134106&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java Thu Jun 9 22:03:44 2011
@@ -229,11 +229,11 @@ public class DefaultSDContextGenerator i
*/
private static final int previousSpaceIndex(CharSequence sb, int seek) {
seek--;
- while (seek > 0 && sb.charAt(seek) != ' ') {
+ while (seek > 0 && !StringUtil.isWhitespace(sb.charAt(seek))) {
seek--;
}
- if (seek > 0 && sb.charAt(seek) == ' ') {
- while (seek > 0 && sb.charAt(seek - 1) == ' ')
+ if (seek > 0 && StringUtil.isWhitespace(sb.charAt(seek))) {
+ while (seek > 0 && StringUtil.isWhitespace(sb.charAt(seek - 1)))
seek--;
return seek;
}
@@ -253,8 +253,8 @@ public class DefaultSDContextGenerator i
char c;
while (seek < lastIndex) {
c = sb.charAt(seek);
- if (c == ' ' || c == '\n') {
- while (sb.length() > seek + 1 && sb.charAt(seek + 1) == ' ')
+ if (StringUtil.isWhitespace(c)) {
+ while (sb.length() > seek + 1 && StringUtil.isWhitespace(sb.charAt(seek + 1)))
seek++;
return seek;
}