You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/09/08 11:16:09 UTC

svn commit: r1166581 - in /incubator/opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/sentdetect/SentenceDetectorME.java test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java

Author: joern
Date: Thu Sep  8 09:16:08 2011
New Revision: 1166581

URL: http://svn.apache.org/viewvc?rev=1166581&view=rev
Log:
OPENNLP-295 Fixed probability array creation in the case that the sentence does not contain an end-of-sentence character.

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
    incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java?rev=1166581&r1=1166580&r2=1166581&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java Thu Sep  8 09:16:08 2011
@@ -148,7 +148,6 @@ public class SentenceDetectorME implemen
    *
    */
   public Span[] sentPosDetect(String s) {
-    double sentProb = 1;
     sentProbs.clear();
     StringBuffer sb = new StringBuffer(s);
     List<Integer> enders = scanner.getPositions(s);
@@ -165,7 +164,6 @@ public class SentenceDetectorME implemen
 
       double[] probs = model.eval(cgen.getContext(sb, cint));
       String bestOutcome = model.getBestOutcome(probs);
-      sentProb *= probs[model.getIndex(bestOutcome)];
 
       if (bestOutcome.equals(SPLIT) && isAcceptableBreak(s, index, cint)) {
         if (index != cint) {
@@ -199,8 +197,10 @@ public class SentenceDetectorME implemen
         while (end > 0 && StringUtil.isWhitespace(s.charAt(end - 1)))
           end--;
         
-        if ((end - start) > 0)
+        if ((end - start) > 0) {
+          sentProbs.add(1d);
           return new Span[] {new Span(start, end)};
+        }
         else 
           return new Span[0];
     }
@@ -225,6 +225,7 @@ public class SentenceDetectorME implemen
       }
       spans[si]=new Span(start,end);
     }
+    
     if (leftover) {
       spans[spans.length-1] = new Span(starts[starts.length-1],s.length());
       sentProbs.add(ONE);

Modified: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java?rev=1166581&r1=1166580&r2=1166581&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java Thu Sep  8 09:16:08 2011
@@ -55,6 +55,7 @@ public class SentenceDetectorMETest {
     assertEquals(sents[1],"There are many tests, this is the second.");
     double[] probs = sentDetect.getSentenceProbabilities();
     assertEquals(probs.length,2);
+    
     String sampleSentences2 = "This is a test. There are many tests, this is the second";
     sents = sentDetect.sentDetect(sampleSentences2);
     assertEquals(sents.length,2);
@@ -62,9 +63,7 @@ public class SentenceDetectorMETest {
     assertEquals(probs.length,2);
     assertEquals(sents[0],"This is a test.");
     assertEquals(sents[1],"There are many tests, this is the second");
-    assertEquals(sents.length,2);
-    probs = sentDetect.getSentenceProbabilities();
-    assertEquals(probs.length,2);
+    
     String sampleSentences3 = "This is a \"test\". He said \"There are many tests, this is the second.\"";
     sents = sentDetect.sentDetect(sampleSentences3);
     assertEquals(sents.length,2);
@@ -72,6 +71,7 @@ public class SentenceDetectorMETest {
     assertEquals(probs.length,2);
     assertEquals(sents[0],"This is a \"test\".");
     assertEquals(sents[1],"He said \"There are many tests, this is the second.\"");
+    
     String sampleSentences4 = "This is a \"test\". I said \"This is a test.\"  Any questions?";
     sents = sentDetect.sentDetect(sampleSentences4);
     assertEquals(sents.length,3);
@@ -80,29 +80,39 @@ public class SentenceDetectorMETest {
     assertEquals(sents[0],"This is a \"test\".");
     assertEquals(sents[1],"I said \"This is a test.\"");
     assertEquals(sents[2],"Any questions?");
+    
     String sampleSentences5 = "This is a one sentence test space at the end.    ";
     sents = sentDetect.sentDetect(sampleSentences5);
     assertEquals(1, sentDetect.getSentenceProbabilities().length);
     assertEquals(sents[0],"This is a one sentence test space at the end.");
+    
     String sampleSentences6 = "This is a one sentences test with tab at the end.            ";
     sents = sentDetect.sentDetect(sampleSentences6);
     assertEquals(sents[0],"This is a one sentences test with tab at the end.");
+    
     String sampleSentences7 = "This is a test.    With spaces between the two sentences.";
     sents = sentDetect.sentDetect(sampleSentences7);
     assertEquals(sents[0],"This is a test.");
     assertEquals(sents[1],"With spaces between the two sentences.");
+    
     String sampleSentences9 = "";
     sents = sentDetect.sentDetect(sampleSentences9);
     assertEquals(0, sents.length);
+    
     String sampleSentences10 = "               "; // whitespaces and tabs
     sents = sentDetect.sentDetect(sampleSentences10);
     assertEquals(0, sents.length);
+    
     String sampleSentences11 = "This is test sentence without a dot at the end and spaces          ";
     sents = sentDetect.sentDetect(sampleSentences11);
     assertEquals(sents[0],"This is test sentence without a dot at the end and spaces");
+    probs = sentDetect.getSentenceProbabilities();
+    assertEquals(1, probs.length);
+    
     String sampleSentence12 = "    This is a test.";
     sents = sentDetect.sentDetect(sampleSentence12);
     assertEquals(sents[0],"This is a test.");    
+    
     String sampleSentence13 = " This is a test";
     sents = sentDetect.sentDetect(sampleSentence13);
     assertEquals(sents[0],"This is a test");
@@ -114,5 +124,6 @@ public class SentenceDetectorMETest {
     assertEquals(probs.length,2);
     assertEquals(new Span(0, 15), pos[0]);
     assertEquals(new Span(16, 56), pos[1]);
+    
   }
 }