You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/01/27 15:21:47 UTC

svn commit: r1561692 - /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java

Author: joern
Date: Mon Jan 27 14:21:47 2014
New Revision: 1561692

URL: http://svn.apache.org/r1561692
Log:
OPENNLP-602 Changed white space sentence handling. Thanks to Tim Miller for providing a patch

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java?rev=1561692&r1=1561691&r2=1561692&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java Mon Jan 27 14:21:47 2014
@@ -176,7 +176,8 @@ public class SentenceDetectorME implemen
       if (i + 1 < end && enders.get(i + 1) < fws) {
         continue;
       }
-
+      if(positions.size() > 0 && cint < positions.get(positions.size()-1)) continue;
+      
       double[] probs = model.eval(cgen.getContext(sb, cint));
       String bestOutcome = model.getBestOutcome(probs);
 
@@ -223,7 +224,7 @@ public class SentenceDetectorME implemen
     // Convert the sentence end indexes to spans
     
     boolean leftover = starts[starts.length - 1] != s.length();
-    List<Span> spans = new ArrayList<Span>(leftover? starts.length + 1 : starts.length);
+    Span[] spans = new Span[leftover? starts.length + 1 : starts.length];
     
     for (int si=0; si < starts.length; si++) {
       int start;
@@ -239,7 +240,7 @@ public class SentenceDetectorME implemen
       // the span will be zero after trimming and should be ignored.
       Span span = new Span(start, starts[si]).trim(s);
       if (span.length() > 0) {
-        spans.add(span);
+        spans[si] = span;
       }
       else {
         sentProbs.remove(si);
@@ -249,12 +250,12 @@ public class SentenceDetectorME implemen
     if (leftover) {
       Span span = new Span(starts[starts.length-1],s.length()).trim(s);
       if (span.length() > 0) {
-        spans.add(span);
+        spans[spans.length-1] = span;
         sentProbs.add(1d);
       }
     }
     
-    return spans.toArray(new Span[spans.size()]);
+    return spans;
   }
 
   /**