You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/06/06 10:09:54 UTC

[10/21] opennlp git commit: OPENNLP-1076: Add validation of spans to SentenceSample

OPENNLP-1076: Add validation of spans to SentenceSample


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/d378c065
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/d378c065
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/d378c065

Branch: refs/heads/LangDetect
Commit: d378c0656ff2374a867abe0383aa841275a47d8d
Parents: 226612f
Author: Jörn Kottmann <jo...@apache.org>
Authored: Wed May 24 12:10:37 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Wed May 24 12:10:37 2017 +0200

----------------------------------------------------------------------
 .../main/java/opennlp/tools/sentdetect/SentenceSample.java  | 9 +++++++++
 .../java/opennlp/tools/sentdetect/SentenceSampleTest.java   | 7 ++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/d378c065/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
index dbbd193..7891cfd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
@@ -45,6 +45,15 @@ public class SentenceSample {
   public SentenceSample(CharSequence document, Span... sentences) {
     this.document = document.toString();
     this.sentences = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(sentences)));
+
+    // validate that all spans are inside the document text
+    for (Span sentence : sentences) {
+      if (sentence.getEnd() > document.length()) {
+        throw new IllegalArgumentException(
+            String.format("Sentence span is outside of document text [len %d] and span %s",
+            document.length(), sentence));
+      }
+    }
   }
 
   public SentenceSample(Detokenizer detokenizer, String[][] sentences) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d378c065/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
index 163cb73..2ec0978 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
@@ -29,7 +29,6 @@ public class SentenceSampleTest {
 
   @Test
   public void testRetrievingContent() {
-
     SentenceSample sample = new SentenceSample("1. 2.",
         new Span(0, 2), new Span(3, 5));
 
@@ -38,6 +37,12 @@ public class SentenceSampleTest {
     Assert.assertEquals(new Span(3, 5), sample.getSentences()[1]);
   }
 
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidSpansFailFast() {
+    SentenceSample sample = new SentenceSample("1. 2.",
+        new Span(0, 2), new Span(5, 7));
+  }
+
   @Test
   public void testEquals() {
     Assert.assertFalse(createGoldSample() == createGoldSample());