You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/06/06 10:09:54 UTC
[10/21] opennlp git commit: OPENNLP-1076: Add validation of spans to
SentenceSample
OPENNLP-1076: Add validation of spans to SentenceSample
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/d378c065
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/d378c065
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/d378c065
Branch: refs/heads/LangDetect
Commit: d378c0656ff2374a867abe0383aa841275a47d8d
Parents: 226612f
Author: Jörn Kottmann <jo...@apache.org>
Authored: Wed May 24 12:10:37 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Wed May 24 12:10:37 2017 +0200
----------------------------------------------------------------------
.../main/java/opennlp/tools/sentdetect/SentenceSample.java | 9 +++++++++
.../java/opennlp/tools/sentdetect/SentenceSampleTest.java | 7 ++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/d378c065/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
index dbbd193..7891cfd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
@@ -45,6 +45,15 @@ public class SentenceSample {
public SentenceSample(CharSequence document, Span... sentences) {
this.document = document.toString();
this.sentences = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(sentences)));
+
+ // validate that all spans are inside the document text
+ for (Span sentence : sentences) {
+ if (sentence.getEnd() > document.length()) {
+ throw new IllegalArgumentException(
+ String.format("Sentence span is outside of document text [len %d] and span %s",
+ document.length(), sentence));
+ }
+ }
}
public SentenceSample(Detokenizer detokenizer, String[][] sentences) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/d378c065/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
index 163cb73..2ec0978 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
@@ -29,7 +29,6 @@ public class SentenceSampleTest {
@Test
public void testRetrievingContent() {
-
SentenceSample sample = new SentenceSample("1. 2.",
new Span(0, 2), new Span(3, 5));
@@ -38,6 +37,12 @@ public class SentenceSampleTest {
Assert.assertEquals(new Span(3, 5), sample.getSentences()[1]);
}
+ @Test(expected = IllegalArgumentException.class)
+ public void testInvalidSpansFailFast() {
+ SentenceSample sample = new SentenceSample("1. 2.",
+ new Span(0, 2), new Span(5, 7));
+ }
+
@Test
public void testEquals() {
Assert.assertFalse(createGoldSample() == createGoldSample());