You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/09/08 11:16:09 UTC
svn commit: r1166581 - in /incubator/opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
Author: joern
Date: Thu Sep 8 09:16:08 2011
New Revision: 1166581
URL: http://svn.apache.org/viewvc?rev=1166581&view=rev
Log:
OPENNLP-295 Fixed probability array creation in the case that the sentence does not contain an end-of-sentence character.
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java?rev=1166581&r1=1166580&r2=1166581&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java Thu Sep 8 09:16:08 2011
@@ -148,7 +148,6 @@ public class SentenceDetectorME implemen
*
*/
public Span[] sentPosDetect(String s) {
- double sentProb = 1;
sentProbs.clear();
StringBuffer sb = new StringBuffer(s);
List<Integer> enders = scanner.getPositions(s);
@@ -165,7 +164,6 @@ public class SentenceDetectorME implemen
double[] probs = model.eval(cgen.getContext(sb, cint));
String bestOutcome = model.getBestOutcome(probs);
- sentProb *= probs[model.getIndex(bestOutcome)];
if (bestOutcome.equals(SPLIT) && isAcceptableBreak(s, index, cint)) {
if (index != cint) {
@@ -199,8 +197,10 @@ public class SentenceDetectorME implemen
while (end > 0 && StringUtil.isWhitespace(s.charAt(end - 1)))
end--;
- if ((end - start) > 0)
+ if ((end - start) > 0) {
+ sentProbs.add(1d);
return new Span[] {new Span(start, end)};
+ }
else
return new Span[0];
}
@@ -225,6 +225,7 @@ public class SentenceDetectorME implemen
}
spans[si]=new Span(start,end);
}
+
if (leftover) {
spans[spans.length-1] = new Span(starts[starts.length-1],s.length());
sentProbs.add(ONE);
Modified: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java?rev=1166581&r1=1166580&r2=1166581&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java Thu Sep 8 09:16:08 2011
@@ -55,6 +55,7 @@ public class SentenceDetectorMETest {
assertEquals(sents[1],"There are many tests, this is the second.");
double[] probs = sentDetect.getSentenceProbabilities();
assertEquals(probs.length,2);
+
String sampleSentences2 = "This is a test. There are many tests, this is the second";
sents = sentDetect.sentDetect(sampleSentences2);
assertEquals(sents.length,2);
@@ -62,9 +63,7 @@ public class SentenceDetectorMETest {
assertEquals(probs.length,2);
assertEquals(sents[0],"This is a test.");
assertEquals(sents[1],"There are many tests, this is the second");
- assertEquals(sents.length,2);
- probs = sentDetect.getSentenceProbabilities();
- assertEquals(probs.length,2);
+
String sampleSentences3 = "This is a \"test\". He said \"There are many tests, this is the second.\"";
sents = sentDetect.sentDetect(sampleSentences3);
assertEquals(sents.length,2);
@@ -72,6 +71,7 @@ public class SentenceDetectorMETest {
assertEquals(probs.length,2);
assertEquals(sents[0],"This is a \"test\".");
assertEquals(sents[1],"He said \"There are many tests, this is the second.\"");
+
String sampleSentences4 = "This is a \"test\". I said \"This is a test.\" Any questions?";
sents = sentDetect.sentDetect(sampleSentences4);
assertEquals(sents.length,3);
@@ -80,29 +80,39 @@ public class SentenceDetectorMETest {
assertEquals(sents[0],"This is a \"test\".");
assertEquals(sents[1],"I said \"This is a test.\"");
assertEquals(sents[2],"Any questions?");
+
String sampleSentences5 = "This is a one sentence test space at the end. ";
sents = sentDetect.sentDetect(sampleSentences5);
assertEquals(1, sentDetect.getSentenceProbabilities().length);
assertEquals(sents[0],"This is a one sentence test space at the end.");
+
String sampleSentences6 = "This is a one sentences test with tab at the end. ";
sents = sentDetect.sentDetect(sampleSentences6);
assertEquals(sents[0],"This is a one sentences test with tab at the end.");
+
String sampleSentences7 = "This is a test. With spaces between the two sentences.";
sents = sentDetect.sentDetect(sampleSentences7);
assertEquals(sents[0],"This is a test.");
assertEquals(sents[1],"With spaces between the two sentences.");
+
String sampleSentences9 = "";
sents = sentDetect.sentDetect(sampleSentences9);
assertEquals(0, sents.length);
+
String sampleSentences10 = " "; // whitespaces and tabs
sents = sentDetect.sentDetect(sampleSentences10);
assertEquals(0, sents.length);
+
String sampleSentences11 = "This is test sentence without a dot at the end and spaces ";
sents = sentDetect.sentDetect(sampleSentences11);
assertEquals(sents[0],"This is test sentence without a dot at the end and spaces");
+ probs = sentDetect.getSentenceProbabilities();
+ assertEquals(1, probs.length);
+
String sampleSentence12 = " This is a test.";
sents = sentDetect.sentDetect(sampleSentence12);
assertEquals(sents[0],"This is a test.");
+
String sampleSentence13 = " This is a test";
sents = sentDetect.sentDetect(sampleSentence13);
assertEquals(sents[0],"This is a test");
@@ -114,5 +124,6 @@ public class SentenceDetectorMETest {
assertEquals(probs.length,2);
assertEquals(new Span(0, 15), pos[0]);
assertEquals(new Span(16, 56), pos[1]);
+
}
}