You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2022/12/23 20:47:56 UTC
[opennlp] branch main updated: OPENNLP-1421 Improve tests in opennlp.tools.formats package (#466)
This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new ea040b59 OPENNLP-1421 Improve tests in opennlp.tools.formats package (#466)
ea040b59 is described below
commit ea040b59cfa002c8f3f06c0ecf6fc507db41578d
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Fri Dec 23 21:47:50 2022 +0100
OPENNLP-1421 Improve tests in opennlp.tools.formats package (#466)
- introduces common base test classes `Abstract...Test` to reduce code duplication for resource handling
- extracts (test) resource loading into separate methods
- adds further assertions for existing tests where useful
- adds final modifier where applicable
- adjusts `TokenSample` to implement `opennlp.tools.commons.Sample`
- fixes some typos
---
.../java/opennlp/tools/tokenize/TokenSample.java | 6 +-
...sStreamFactory.java => AbstractFormatTest.java} | 21 +++----
...mFactory.java => AbstractSampleStreamTest.java} | 17 +-----
.../tools/formats/Conll02NameSampleStreamTest.java | 69 ++++++++++-----------
.../tools/formats/Conll03NameSampleStreamTest.java | 71 ++++++++++------------
.../tools/formats/ConllXPOSSampleStreamTest.java | 9 +--
.../tools/formats/DirectorySampleStreamTest.java | 20 +++---
.../tools/formats/EvalitaNameSampleStreamTest.java | 51 ++++++++--------
.../formats/NameFinderCensus90NameStreamTest.java | 18 ++----
.../tools/formats/ResourceAsStreamFactory.java | 6 +-
.../tools/formats/ad/ADChunkSampleStreamTest.java | 37 +++++------
.../tools/formats/ad/ADNameSampleStreamTest.java | 35 +++++------
.../tools/formats/ad/ADPOSSampleStreamTest.java | 21 +++----
.../tools/formats/ad/ADParagraphStreamTest.java | 26 ++++----
.../formats/ad/ADSentenceSampleStreamTest.java | 39 +++++-------
.../tools/formats/ad/ADTokenSampleStreamTest.java | 36 +++++------
.../AbstractADSampleStreamTest.java} | 29 +++++----
.../AbstractBratTest.java} | 33 +++++-----
.../formats/brat/BratAnnotationStreamTest.java | 12 ++--
.../tools/formats/brat/BratDocumentParserTest.java | 10 ++-
.../tools/formats/brat/BratDocumentTest.java | 18 +++---
.../formats/brat/BratNameSampleStreamTest.java | 8 +--
.../AbstractConlluSampleStreamTest.java} | 25 ++++----
.../conllu/ConlluLemmaSampleStreamTest.java | 14 ++---
.../formats/conllu/ConlluPOSSampleStreamTest.java | 21 +++----
.../conllu/ConlluSentenceSampleStreamTest.java | 15 +++--
.../tools/formats/conllu/ConlluStreamTest.java | 16 ++---
.../conllu/ConlluTokenSampleStreamTest.java | 19 +++---
.../ConstitParseSampleStreamTest.java | 28 ++++-----
.../IrishSentenceBankDocumentTest.java | 6 +-
.../tools/formats/letsmt/LetsmtDocumentTest.java | 6 +-
.../AbstractMascSampleStreamTest.java} | 33 +++++-----
.../masc/MascNamedEntitySampleStreamTest.java | 52 ++++++----------
.../formats/masc/MascPOSSampleStreamTest.java | 47 +++++---------
.../formats/masc/MascSentenceSampleStreamTest.java | 35 +++++------
.../formats/masc/MascTokenSampleStreamTest.java | 46 +++++---------
.../opennlp/tools/formats/muc/SgmlParserTest.java | 8 ++-
.../formats/nkjp/NKJPSegmentationDocumentTest.java | 8 +--
.../tools/formats/nkjp/NKJPTextDocumentTest.java | 11 ++--
39 files changed, 420 insertions(+), 562 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
index 03f29470..3f3aab9c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
@@ -17,22 +17,22 @@
package opennlp.tools.tokenize;
-import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
+import opennlp.tools.commons.Sample;
import opennlp.tools.tokenize.Detokenizer.DetokenizationOperation;
import opennlp.tools.util.Span;
/**
* A {@link TokenSample} is text with token spans.
*/
-public class TokenSample implements Serializable {
+public class TokenSample implements Sample {
+ private static final long serialVersionUID = 8520715903833887047L;
- private static final long serialVersionUID = 1057011880085907705L;
public static final String DEFAULT_SEPARATOR_CHARS = "<SPLIT>";
private static final String separatorChars = DEFAULT_SEPARATOR_CHARS;
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java b/opennlp-tools/src/test/java/opennlp/tools/formats/AbstractFormatTest.java
similarity index 61%
copy from opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
copy to opennlp-tools/src/test/java/opennlp/tools/formats/AbstractFormatTest.java
index 7e02fe79..bee7379a 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/AbstractFormatTest.java
@@ -18,22 +18,21 @@
package opennlp.tools.formats;
import java.io.InputStream;
-import java.util.Objects;
+import java.net.URL;
-import opennlp.tools.util.InputStreamFactory;
+public abstract class AbstractFormatTest {
-public class ResourceAsStreamFactory implements InputStreamFactory {
+ protected static final String FORMATS_BASE_DIR = "/opennlp/tools/formats/";
- private Class<?> clazz;
- private String name;
+ protected URL getResource(String resource) {
+ return AbstractFormatTest.class.getResource(FORMATS_BASE_DIR + resource);
+ }
- public ResourceAsStreamFactory(Class<?> clazz, String name) {
- this.clazz = Objects.requireNonNull(clazz, "callz must not be null");
- this.name = Objects.requireNonNull(name, "name must not be null");
+ protected URL getResourceWithoutPrefix(String resource) {
+ return getClass().getClassLoader().getResource(resource);
}
- @Override
- public InputStream createInputStream() {
- return clazz.getResourceAsStream(name);
+ protected InputStream getResourceStream(String resource) {
+ return AbstractFormatTest.class.getResourceAsStream(FORMATS_BASE_DIR + resource);
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java b/opennlp-tools/src/test/java/opennlp/tools/formats/AbstractSampleStreamTest.java
similarity index 64%
copy from opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
copy to opennlp-tools/src/test/java/opennlp/tools/formats/AbstractSampleStreamTest.java
index 7e02fe79..c154f21d 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/AbstractSampleStreamTest.java
@@ -17,23 +17,12 @@
package opennlp.tools.formats;
-import java.io.InputStream;
-import java.util.Objects;
-
import opennlp.tools.util.InputStreamFactory;
-public class ResourceAsStreamFactory implements InputStreamFactory {
-
- private Class<?> clazz;
- private String name;
- public ResourceAsStreamFactory(Class<?> clazz, String name) {
- this.clazz = Objects.requireNonNull(clazz, "callz must not be null");
- this.name = Objects.requireNonNull(name, "name must not be null");
- }
+public abstract class AbstractSampleStreamTest extends AbstractFormatTest {
- @Override
- public InputStream createInputStream() {
- return clazz.getResourceAsStream(name);
+ protected InputStreamFactory getFactory(String resource) {
+ return new ResourceAsStreamFactory(AbstractSampleStreamTest.class, FORMATS_BASE_DIR + resource);
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java
index 8d86ed36..14806626 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java
@@ -24,7 +24,6 @@ import org.junit.jupiter.api.Test;
import opennlp.tools.formats.Conll02NameSampleStream.LANGUAGE;
import opennlp.tools.namefind.NameSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
@@ -32,62 +31,58 @@ import opennlp.tools.util.Span;
* Note:
* Sample training data must be UTF-8 encoded and uncompressed!
*/
-public class Conll02NameSampleStreamTest {
-
- private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(Conll02NameSampleStreamTest.class,
- "/opennlp/tools/formats/" + name);
-
- return new Conll02NameSampleStream(lang, in, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
- }
+public class Conll02NameSampleStreamTest extends AbstractSampleStreamTest {
@Test
void testParsingSpanishSample() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.SPA, "conll2002-es.sample");
-
- NameSample personName = sampleStream.read();
-
- Assertions.assertNotNull(personName);
+ try (ObjectStream<NameSample> sampleStream = openData(LANGUAGE.SPA, "conll2002-es.sample")) {
+ NameSample personName = sampleStream.read();
- Assertions.assertEquals(5, personName.getSentence().length);
- Assertions.assertEquals(1, personName.getNames().length);
- Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals(5, personName.getSentence().length);
+ Assertions.assertEquals(1, personName.getNames().length);
+ Assertions.assertTrue(personName.isClearAdaptiveDataSet());
- Span nameSpan = personName.getNames()[0];
- Assertions.assertEquals(0, nameSpan.getStart());
- Assertions.assertEquals(4, nameSpan.getEnd());
- Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
+ Span nameSpan = personName.getNames()[0];
+ Assertions.assertEquals(0, nameSpan.getStart());
+ Assertions.assertEquals(4, nameSpan.getEnd());
+ Assertions.assertTrue(personName.isClearAdaptiveDataSet());
- Assertions.assertEquals(0, sampleStream.read().getNames().length);
+ Assertions.assertEquals(0, sampleStream.read().getNames().length);
- Assertions.assertNull(sampleStream.read());
+ Assertions.assertNull(sampleStream.read());
+ }
}
@Test
void testParsingDutchSample() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample");
+ try (ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample")) {
+ NameSample personName = sampleStream.read();
- NameSample personName = sampleStream.read();
+ Assertions.assertEquals(0, personName.getNames().length);
+ Assertions.assertTrue(personName.isClearAdaptiveDataSet());
- Assertions.assertEquals(0, personName.getNames().length);
- Assertions.assertTrue(personName.isClearAdaptiveDataSet());
+ personName = sampleStream.read();
- personName = sampleStream.read();
-
- Assertions.assertFalse(personName.isClearAdaptiveDataSet());
-
- Assertions.assertNull(sampleStream.read());
+ Assertions.assertFalse(personName.isClearAdaptiveDataSet());
+ Assertions.assertNull(sampleStream.read());
+ }
}
@Test
void testReset() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample");
-
- NameSample sample = sampleStream.read();
+ try (ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample")) {
+ NameSample sample = sampleStream.read();
+ sampleStream.reset();
- sampleStream.reset();
+ Assertions.assertEquals(sample, sampleStream.read());
+ }
+ }
- Assertions.assertEquals(sample, sampleStream.read());
+ private ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException {
+ return new Conll02NameSampleStream(
+ lang, getFactory(name), Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
}
+
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java
index 5456dfe8..ddcb3065 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java
@@ -24,51 +24,42 @@ import org.junit.jupiter.api.Test;
import opennlp.tools.formats.Conll03NameSampleStream.LANGUAGE;
import opennlp.tools.namefind.NameSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
/**
* Test for the {@link Conll03NameSampleStream} class.
*/
-public class Conll03NameSampleStreamTest {
+public class Conll03NameSampleStreamTest extends AbstractSampleStreamTest {
private static final String ENGLISH_SAMPLE = "conll2003-en.sample";
private static final String GERMAN_SAMPLE = "conll2003-de.sample";
-
- private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(Conll03NameSampleStreamTest.class,
- "/opennlp/tools/formats/" + name);
-
- return new Conll03NameSampleStream(lang, in, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
- }
-
@Test
void testParsingEnglishSample() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.EN, ENGLISH_SAMPLE);
+ try (ObjectStream<NameSample> sampleStream = openData(LANGUAGE.EN, ENGLISH_SAMPLE)) {
+ NameSample personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
- NameSample personName = sampleStream.read();
- Assertions.assertNotNull(personName);
+ Assertions.assertEquals(9, personName.getSentence().length);
+ Assertions.assertEquals(0, personName.getNames().length);
+ Assertions.assertTrue(personName.isClearAdaptiveDataSet());
- Assertions.assertEquals(9, personName.getSentence().length);
- Assertions.assertEquals(0, personName.getNames().length);
- Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
+ personName = sampleStream.read();
- personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
- Assertions.assertNotNull(personName);
+ Assertions.assertEquals(2, personName.getSentence().length);
+ Assertions.assertEquals(1, personName.getNames().length);
+ Assertions.assertFalse(personName.isClearAdaptiveDataSet());
- Assertions.assertEquals(2, personName.getSentence().length);
- Assertions.assertEquals(1, personName.getNames().length);
- Assertions.assertEquals(false, personName.isClearAdaptiveDataSet());
+ Span nameSpan = personName.getNames()[0];
+ Assertions.assertEquals(0, nameSpan.getStart());
+ Assertions.assertEquals(2, nameSpan.getEnd());
- Span nameSpan = personName.getNames()[0];
- Assertions.assertEquals(0, nameSpan.getStart());
- Assertions.assertEquals(2, nameSpan.getEnd());
-
- Assertions.assertNull(sampleStream.read());
+ Assertions.assertNull(sampleStream.read());
+ }
}
@Test
@@ -90,24 +81,28 @@ public class Conll03NameSampleStreamTest {
@Test
void testParsingGermanSample() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE);
-
- NameSample personName = sampleStream.read();
- Assertions.assertNotNull(personName);
+ try (ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE)) {
+ NameSample personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
- Assertions.assertEquals(5, personName.getSentence().length);
- Assertions.assertEquals(0, personName.getNames().length);
- Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
+ Assertions.assertEquals(5, personName.getSentence().length);
+ Assertions.assertEquals(0, personName.getNames().length);
+ Assertions.assertTrue(personName.isClearAdaptiveDataSet());
+ }
}
@Test
void testReset() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE);
+ try (ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE)) {
+ NameSample sample = sampleStream.read();
+ sampleStream.reset();
- NameSample sample = sampleStream.read();
-
- sampleStream.reset();
+ Assertions.assertEquals(sample, sampleStream.read());
+ }
+ }
- Assertions.assertEquals(sample, sampleStream.read());
+ private ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException {
+ return new Conll03NameSampleStream(
+ lang, getFactory(name), Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
index 2bb3b67e..7d46dfe6 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
@@ -24,18 +24,15 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import opennlp.tools.postag.POSSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
-public class ConllXPOSSampleStreamTest {
+public class ConllXPOSSampleStreamTest extends AbstractSampleStreamTest {
@Test
void testParsingSample() throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(ConllXPOSSampleStreamTest.class,
- "/opennlp/tools/formats/conllx.sample");
-
- try (ObjectStream<POSSample> sampleStream = new ConllXPOSSampleStream(in, StandardCharsets.UTF_8)) {
+ try (ObjectStream<POSSample> sampleStream = new ConllXPOSSampleStream(
+ getFactory("conllx.sample"), StandardCharsets.UTF_8)) {
POSSample a = sampleStream.read();
String[] aSentence = a.getSentence();
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java
index 4c42e349..b49671fd 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/DirectorySampleStreamTest.java
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.UUID;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -34,12 +35,16 @@ public class DirectorySampleStreamTest {
@TempDir
Path tempDirectory;
-
+ private FileFilter filter;
+
+ @BeforeEach
+ public void setup() {
+ filter = new TempFileNameFilter();
+ }
+
@Test
public void directoryTest() throws IOException {
- FileFilter filter = new TempFileNameFilter();
-
List<File> files = new ArrayList<>();
File temp1 = createTempFile();
@@ -92,8 +97,6 @@ public class DirectorySampleStreamTest {
@Test
public void recursiveDirectoryTest() throws IOException {
- FileFilter filter = new TempFileNameFilter();
-
List<File> files = new ArrayList<>();
File temp1 = createTempFile();
@@ -121,8 +124,6 @@ public class DirectorySampleStreamTest {
@Test
public void resetDirectoryTest() throws IOException {
- FileFilter filter = new TempFileNameFilter();
-
List<File> files = new ArrayList<>();
File temp1 = createTempFile();
@@ -154,10 +155,7 @@ public class DirectorySampleStreamTest {
@Test
public void emptyDirectoryTest() throws IOException {
- FileFilter filter = new TempFileNameFilter();
-
DirectorySampleStream stream = new DirectorySampleStream(tempDirectory.toFile(), filter, false);
-
Assertions.assertNull(stream.read());
stream.close();
@@ -205,7 +203,7 @@ public class DirectorySampleStreamTest {
}
- class TempFileNameFilter implements FileFilter {
+ static class TempFileNameFilter implements FileFilter {
@Override
public boolean accept(File file) {
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/EvalitaNameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/EvalitaNameSampleStreamTest.java
index ab52031c..37929e3e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/EvalitaNameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/EvalitaNameSampleStreamTest.java
@@ -24,7 +24,6 @@ import org.junit.jupiter.api.Test;
import opennlp.tools.formats.EvalitaNameSampleStream.LANGUAGE;
import opennlp.tools.namefind.NameSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
@@ -32,43 +31,41 @@ import opennlp.tools.util.Span;
* Note:
* Sample training data must be UTF-8 encoded and uncompressed!
*/
-public class EvalitaNameSampleStreamTest {
-
- private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(EvalitaNameSampleStreamTest.class,
- "/opennlp/tools/formats/" + name);
-
- return new EvalitaNameSampleStream(lang, in, EvalitaNameSampleStream.GENERATE_PERSON_ENTITIES);
- }
+public class EvalitaNameSampleStreamTest extends AbstractSampleStreamTest {
@Test
void testParsingItalianSample() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.IT, "evalita-ner-it.sample");
-
- NameSample personName = sampleStream.read();
+ try (ObjectStream<NameSample> sampleStream = openData()) {
+ NameSample personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
- Assertions.assertNotNull(personName);
+ Assertions.assertEquals(11, personName.getSentence().length);
+ Assertions.assertEquals(1, personName.getNames().length);
+ Assertions.assertTrue(personName.isClearAdaptiveDataSet());
- Assertions.assertEquals(11, personName.getSentence().length);
- Assertions.assertEquals(1, personName.getNames().length);
- Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
+ Span nameSpan = personName.getNames()[0];
+ Assertions.assertEquals(8, nameSpan.getStart());
+ Assertions.assertEquals(10, nameSpan.getEnd());
+ Assertions.assertTrue(personName.isClearAdaptiveDataSet());
- Span nameSpan = personName.getNames()[0];
- Assertions.assertEquals(8, nameSpan.getStart());
- Assertions.assertEquals(10, nameSpan.getEnd());
- Assertions.assertEquals(true, personName.isClearAdaptiveDataSet());
+ Assertions.assertEquals(0, sampleStream.read().getNames().length);
- Assertions.assertEquals(0, sampleStream.read().getNames().length);
-
- Assertions.assertNull(sampleStream.read());
+ Assertions.assertNull(sampleStream.read());
+ }
}
@Test
void testReset() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.IT, "evalita-ner-it.sample");
- NameSample sample = sampleStream.read();
- sampleStream.reset();
- Assertions.assertEquals(sample, sampleStream.read());
+ try (ObjectStream<NameSample> sampleStream = openData()) {
+ NameSample sample = sampleStream.read();
+ sampleStream.reset();
+ Assertions.assertEquals(sample, sampleStream.read());
+ }
+ }
+
+ private ObjectStream<NameSample> openData() throws IOException {
+ return new EvalitaNameSampleStream(LANGUAGE.IT, getFactory("evalita-ner-it.sample"),
+ EvalitaNameSampleStream.GENERATE_PERSON_ENTITIES);
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
index b5b3be05..8e04b929 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
@@ -23,25 +23,15 @@ import java.nio.charset.StandardCharsets;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.StringList;
-public class NameFinderCensus90NameStreamTest {
-
- private static ObjectStream<StringList> openData(String name)
- throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(
- NameFinderCensus90NameStreamTest.class,
- "/opennlp/tools/formats/" + name);
-
- return new NameFinderCensus90NameStream(in, StandardCharsets.UTF_8);
- }
+public class NameFinderCensus90NameStreamTest extends AbstractSampleStreamTest {
@Test
void testParsingEnglishSample() throws IOException {
- ObjectStream<StringList> sampleStream = openData("census90.sample");
+ ObjectStream<StringList> sampleStream = openData();
StringList personName = sampleStream.read();
@@ -100,4 +90,8 @@ public class NameFinderCensus90NameStreamTest {
Assertions.assertNull(personName);
}
+ private ObjectStream<StringList> openData() throws IOException {
+ return new NameFinderCensus90NameStream(getFactory("census90.sample"), StandardCharsets.UTF_8);
+ }
+
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
index 7e02fe79..4bda1c15 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
@@ -24,11 +24,11 @@ import opennlp.tools.util.InputStreamFactory;
public class ResourceAsStreamFactory implements InputStreamFactory {
- private Class<?> clazz;
- private String name;
+ private final Class<?> clazz;
+ private final String name;
public ResourceAsStreamFactory(Class<?> clazz, String name) {
- this.clazz = Objects.requireNonNull(clazz, "callz must not be null");
+ this.clazz = Objects.requireNonNull(clazz, "clazz must not be null");
this.name = Objects.requireNonNull(name, "name must not be null");
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java
index 7435ec83..3b2cd9e9 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java
@@ -19,25 +19,32 @@ package opennlp.tools.formats.ad;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.List;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import opennlp.tools.chunker.ChunkSample;
-import opennlp.tools.formats.ResourceAsStreamFactory;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.PlainTextByLineStream;
-public class ADChunkSampleStreamTest {
-
- private List<ChunkSample> samples = new ArrayList<>();
+public class ADChunkSampleStreamTest extends AbstractADSampleStreamTest<ChunkSample> {
+ @BeforeEach
+ void setup() throws IOException {
+ super.setup();
+ try (ADChunkSampleStream stream = new ADChunkSampleStream(
+ new PlainTextByLineStream(in, StandardCharsets.UTF_8))) {
+ ChunkSample sample;
+ while ((sample = stream.read()) != null) {
+ samples.add(sample);
+ }
+ Assertions.assertFalse(samples.isEmpty());
+ }
+ }
+
@Test
void testSimpleCount() {
- Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
+ Assertions.assertEquals(NUM_SENTENCES, samples.size());
}
@Test
@@ -64,18 +71,4 @@ public class ADChunkSampleStreamTest {
Assertions.assertEquals("B-NP", samples.get(3).getPreds()[0]);
}
- @BeforeEach
- void setup() throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(
- ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample");
-
- try (ADChunkSampleStream stream = new ADChunkSampleStream(new PlainTextByLineStream(in,
- StandardCharsets.UTF_8))) {
- ChunkSample sample;
- while ((sample = stream.read()) != null) {
- samples.add(sample);
- }
- }
- }
-
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java
index 69db07ee..101b4652 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java
@@ -19,26 +19,33 @@ package opennlp.tools.formats.ad;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.List;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.namefind.NameSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
-public class ADNameSampleStreamTest {
+public class ADNameSampleStreamTest extends AbstractADSampleStreamTest<NameSample> {
- private List<NameSample> samples = new ArrayList<>();
+ @BeforeEach
+ void setup() throws IOException {
+ super.setup();
+
+ try (ADNameSampleStream stream = new ADNameSampleStream(
+ new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
+ NameSample sample;
+ while ((sample = stream.read()) != null) {
+ samples.add(sample);
+ }
+ }
+ }
@Test
void testSimpleCount() {
- Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
+ Assertions.assertEquals(NUM_SENTENCES, samples.size());
}
@Test
@@ -110,18 +117,4 @@ public class ADNameSampleStreamTest {
Assertions.assertEquals(new Span(5, 6, "person"), samples.get(7).getNames()[2]);
}
- @BeforeEach
- void setup() throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class,
- "/opennlp/tools/formats/ad.sample");
-
- try (ADNameSampleStream stream =
- new ADNameSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
- NameSample sample;
- while ((sample = stream.read()) != null) {
- samples.add(sample);
- }
- }
- }
-
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADPOSSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADPOSSampleStreamTest.java
index f81f21e2..cbbae220 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADPOSSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADPOSSampleStreamTest.java
@@ -21,20 +21,23 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.postag.POSSample;
import opennlp.tools.util.PlainTextByLineStream;
-public class ADPOSSampleStreamTest {
+public class ADPOSSampleStreamTest extends AbstractADSampleStreamTest<POSSample> {
+
+ @BeforeEach
+ void setup() throws IOException {
+ super.setup();
+ }
@Test
void testSimple() throws IOException {
// add one sentence with expandME = includeFeats = false
- try (ADPOSSampleStream stream = new ADPOSSampleStream(
- new PlainTextByLineStream(new ResourceAsStreamFactory(
- ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
+ try (ADPOSSampleStream stream = new ADPOSSampleStream(new PlainTextByLineStream(in,
StandardCharsets.UTF_8), false, false)) {
POSSample sample = stream.read();
@@ -57,9 +60,7 @@ public class ADPOSSampleStreamTest {
@Test
void testExpandME() throws IOException {
// add one sentence with expandME = true
- try (ADPOSSampleStream stream = new ADPOSSampleStream(
- new PlainTextByLineStream(new ResourceAsStreamFactory(
- ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
+ try (ADPOSSampleStream stream = new ADPOSSampleStream(new PlainTextByLineStream(in,
StandardCharsets.UTF_8), true, false)) {
POSSample sample = stream.read();
@@ -86,9 +87,7 @@ public class ADPOSSampleStreamTest {
@Test
void testIncludeFeats() throws IOException {
// add one sentence with includeFeats = true
- try (ADPOSSampleStream stream = new ADPOSSampleStream(
- new PlainTextByLineStream(new ResourceAsStreamFactory(
- ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
+ try (ADPOSSampleStream stream = new ADPOSSampleStream(new PlainTextByLineStream(in,
StandardCharsets.UTF_8), false, true)) {
POSSample sample = stream.read();
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java
index 6e7554d1..1f55a494 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java
@@ -21,22 +21,27 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
-import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.sentdetect.SentenceSample;
import opennlp.tools.util.PlainTextByLineStream;
-public class ADParagraphStreamTest {
+public class ADParagraphStreamTest extends AbstractADSampleStreamTest<SentenceSample> {
- public static final int NUM_SENTENCES = 8;
+ private ADSentenceStream stream;
+
+ @BeforeEach
+ void setup() throws IOException {
+ super.setup();
+ stream = new ADSentenceStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
+ Assertions.assertNotNull(stream);
+ }
@Test
void testSimpleReading() throws IOException {
int count = 0;
- ADSentenceStream stream = openData();
-
ADSentenceStream.Sentence paragraph = stream.read();
paragraph.getRoot();
while (paragraph != null) {
@@ -52,8 +57,6 @@ public class ADParagraphStreamTest {
void testLeadingWithContraction() throws IOException {
int count = 0;
- ADSentenceStream stream = openData();
-
ADSentenceStream.Sentence paragraph = stream.read();
while (paragraph != null) {
@@ -63,11 +66,4 @@ public class ADParagraphStreamTest {
Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, count);
}
-
- private static ADSentenceStream openData() throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class,
- "/opennlp/tools/formats/ad.sample");
-
- return new ADSentenceStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
- }
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADSentenceSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADSentenceSampleStreamTest.java
index ce043f2c..58f45635 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADSentenceSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADSentenceSampleStreamTest.java
@@ -19,22 +19,31 @@ package opennlp.tools.formats.ad;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.List;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
-public class ADSentenceSampleStreamTest {
+public class ADSentenceSampleStreamTest extends AbstractADSampleStreamTest<SentenceSample> {
- private List<SentenceSample> samples = new ArrayList<>();
+ @BeforeEach
+ void setup() throws IOException {
+ super.setup();
+
+ try (ADSentenceSampleStream stream = new ADSentenceSampleStream(
+ new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
+
+ SentenceSample sample;
+ while ((sample = stream.read()) != null) {
+ samples.add(sample);
+ }
+ Assertions.assertFalse(samples.isEmpty());
+ }
+ }
@Test
void testSimpleCount() {
@@ -50,22 +59,4 @@ public class ADSentenceSampleStreamTest {
Assertions.assertEquals(new Span(120, 180), samples.get(0).getSentences()[1]);
}
- @BeforeEach
- void setup() throws IOException {
- InputStreamFactory in = new ResourceAsStreamFactory(ADSentenceSampleStreamTest.class,
- "/opennlp/tools/formats/ad.sample");
-
- try (ADSentenceSampleStream stream = new ADSentenceSampleStream(
- new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
-
- SentenceSample sample;
-
- while ((sample = stream.read()) != null) {
- System.out.println(sample.getDocument());
- System.out.println("<fim>");
- samples.add(sample);
- }
- }
- }
-
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java
index 309bd1eb..14039e48 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java
@@ -19,10 +19,6 @@ package opennlp.tools.formats.ad;
import java.io.File;
import java.io.IOException;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Objects;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
@@ -31,13 +27,11 @@ import org.junit.jupiter.api.Test;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.util.ObjectStream;
-public class ADTokenSampleStreamTest {
-
- private final List<TokenSample> samples = new ArrayList<>();
+public class ADTokenSampleStreamTest extends AbstractADSampleStreamTest<TokenSample> {
@Test
void testSimpleCount() {
- Assertions.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, samples.size());
+ Assertions.assertEquals(NUM_SENTENCES, samples.size());
}
@Test
@@ -46,25 +40,27 @@ public class ADTokenSampleStreamTest {
}
@BeforeEach
- void setup() throws IOException, URISyntaxException {
+ void setup() throws IOException {
+ super.setup();
+
ADTokenSampleStreamFactory<ADTokenSampleStreamFactory.Parameters> factory =
new ADTokenSampleStreamFactory<>(ADTokenSampleStreamFactory.Parameters.class);
- File dict = new File(Objects.requireNonNull(getClass().getClassLoader()
- .getResource("opennlp/tools/tokenize/latin-detokenizer.xml")).toURI());
- File data = new File(Objects.requireNonNull(getClass().getClassLoader()
- .getResource("opennlp/tools/formats/ad.sample")).toURI());
+ File data = new File(getResource("ad.sample").getFile());
+ Assertions.assertNotNull(data);
+ File dict = new File(getResourceWithoutPrefix("opennlp/tools/tokenize/latin-detokenizer.xml").getFile());
+ Assertions.assertNotNull(dict);
+
String[] args = {"-data", data.getCanonicalPath(), "-encoding", "UTF-8",
"-lang", "por", "-detokenizer", dict.getCanonicalPath()};
- ObjectStream<TokenSample> tokenSampleStream = factory.create(args);
+ try (ObjectStream<TokenSample> tokenSampleStream = factory.create(args)) {
+ TokenSample sample = tokenSampleStream.read();
- TokenSample sample = tokenSampleStream.read();
-
- while (sample != null) {
- samples.add(sample);
- sample = tokenSampleStream.read();
+ while (sample != null) {
+ samples.add(sample);
+ sample = tokenSampleStream.read();
+ }
}
-
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/AbstractADSampleStreamTest.java
similarity index 55%
copy from opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
copy to opennlp-tools/src/test/java/opennlp/tools/formats/ad/AbstractADSampleStreamTest.java
index 7e02fe79..025915a4 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ResourceAsStreamFactory.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/AbstractADSampleStreamTest.java
@@ -15,25 +15,28 @@
* limitations under the License.
*/
-package opennlp.tools.formats;
+package opennlp.tools.formats.ad;
-import java.io.InputStream;
-import java.util.Objects;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.jupiter.api.BeforeEach;
+
+import opennlp.tools.commons.Sample;
+import opennlp.tools.formats.AbstractFormatTest;
+import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.util.InputStreamFactory;
-public class ResourceAsStreamFactory implements InputStreamFactory {
+public abstract class AbstractADSampleStreamTest<T extends Sample> extends AbstractFormatTest {
+ protected static final int NUM_SENTENCES = 8;
- private Class<?> clazz;
- private String name;
+ protected final List<T> samples = new ArrayList<>();
- public ResourceAsStreamFactory(Class<?> clazz, String name) {
- this.clazz = Objects.requireNonNull(clazz, "callz must not be null");
- this.name = Objects.requireNonNull(name, "name must not be null");
- }
+ protected InputStreamFactory in;
- @Override
- public InputStream createInputStream() {
- return clazz.getResourceAsStream(name);
+ @BeforeEach
+ void setup() throws IOException {
+ in = new ResourceAsStreamFactory(AbstractADSampleStreamTest.class, FORMATS_BASE_DIR + "ad.sample");
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
similarity index 59%
copy from opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
copy to opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
index 1aca7a9c..53237934 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
@@ -15,26 +15,31 @@
* limitations under the License.
*/
-package opennlp.tools.formats.muc;
+package opennlp.tools.formats.brat;
+import java.io.File;
import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
-public class SgmlParserTest {
+import opennlp.tools.formats.AbstractFormatTest;
- @Test
- void testParse1() throws IOException {
+public abstract class AbstractBratTest extends AbstractFormatTest {
- try (Reader in = new InputStreamReader(
- SgmlParserTest.class.getResourceAsStream("parsertest1.sgml"), StandardCharsets.UTF_8)) {
- SgmlParser parser = new SgmlParser();
- parser.parse(in, new SgmlParser.ContentHandler() {
- });
- }
+ protected File directory;
+
+ @BeforeEach
+ public void setup() throws IOException {
+ directory = getBratDir();
+ Assertions.assertNotNull(directory);
+ }
+
+ private String getDirectoryAsString() {
+ return getResource("brat/").getFile();
}
+ protected File getBratDir() {
+ return new File(getDirectoryAsString());
+ }
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
index 80b9b3c8..d7887418 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
@@ -17,21 +17,19 @@
package opennlp.tools.formats.brat;
-import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
+import opennlp.tools.formats.AbstractFormatTest;
import opennlp.tools.util.ObjectStream;
-public class BratAnnotationStreamTest {
+public class BratAnnotationStreamTest extends AbstractFormatTest {
private ObjectStream<BratAnnotation> creatBratAnnotationStream(
AnnotationConfiguration conf, String file) {
-
- InputStream in = BratAnnotationStreamTest.class.getResourceAsStream(file);
- return new BratAnnotationStream(conf, "testing", in);
+ return new BratAnnotationStream(conf, "testing", getResourceStream(file));
}
static void addEntityTypes(Map<String, String> typeToClassMap) {
@@ -49,7 +47,7 @@ public class BratAnnotationStreamTest {
AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig,
- "/opennlp/tools/formats/brat/voa-with-entities.ann");
+ "brat/voa-with-entities.ann");
// TODO: Test if we get the entities ... we expect!
@@ -68,7 +66,7 @@ public class BratAnnotationStreamTest {
AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig,
- "/opennlp/tools/formats/brat/voa-with-relations.ann");
+ "brat/voa-with-relations.ann");
// TODO: Test if we get the entities ... we expect!
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
index 59266b00..3e58f021 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
@@ -26,11 +26,12 @@ import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import opennlp.tools.formats.AbstractFormatTest;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.NewlineSentenceDetector;
import opennlp.tools.tokenize.WhitespaceTokenizer;
-public class BratDocumentParserTest {
+public class BratDocumentParserTest extends AbstractFormatTest {
@Test
void testParse() throws IOException {
@@ -39,11 +40,8 @@ public class BratDocumentParserTest {
BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
- InputStream txtIn = BratDocumentTest.class.getResourceAsStream(
- "/opennlp/tools/formats/brat/opennlp-1193.txt");
-
- InputStream annIn = BratDocumentTest.class.getResourceAsStream(
- "/opennlp/tools/formats/brat/opennlp-1193.ann");
+ InputStream txtIn = getResourceStream("brat/opennlp-1193.txt");
+ InputStream annIn = getResourceStream("brat/opennlp-1193.ann");
BratDocument doc = BratDocument.parseDocument(config, "opennlp-1193", txtIn, annIn);
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
index 9d079074..eb0e371a 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
@@ -25,7 +25,9 @@ import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-public class BratDocumentTest {
+import opennlp.tools.formats.AbstractFormatTest;
+
+public class BratDocumentTest extends AbstractFormatTest {
@Test
void testDocumentWithEntitiesParsing() throws IOException {
@@ -34,11 +36,8 @@ public class BratDocumentTest {
BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
- InputStream txtIn = BratDocumentTest.class.getResourceAsStream(
- "/opennlp/tools/formats/brat/voa-with-entities.txt");
-
- InputStream annIn = BratDocumentTest.class.getResourceAsStream(
- "/opennlp/tools/formats/brat/voa-with-entities.ann");
+ InputStream txtIn = getResourceStream("brat/voa-with-entities.txt");
+ InputStream annIn = getResourceStream("brat/voa-with-entities.ann");
BratDocument doc = BratDocument.parseDocument(config, "voa-with-entities", txtIn, annIn);
@@ -72,11 +71,8 @@ public class BratDocumentTest {
BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
- InputStream txtIn = BratDocumentTest.class.getResourceAsStream(
- "/opennlp/tools/formats/brat/opennlp-1193.txt");
-
- InputStream annIn = BratDocumentTest.class.getResourceAsStream(
- "/opennlp/tools/formats/brat/opennlp-1193.ann");
+ InputStream txtIn = getResourceStream("brat/opennlp-1193.txt");
+ InputStream annIn = getResourceStream("brat/opennlp-1193.ann");
BratDocument doc = BratDocument.parseDocument(config, "opennlp-1193", txtIn, annIn);
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
index fac516ae..68c82423 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
@@ -17,7 +17,6 @@
package opennlp.tools.formats.brat;
-import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.Collections;
@@ -33,7 +32,7 @@ import opennlp.tools.sentdetect.NewlineSentenceDetector;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.ObjectStream;
-public class BratNameSampleStreamTest {
+public class BratNameSampleStreamTest extends AbstractBratTest {
private BratNameSampleStream createNameSampleWith(String nameContainsFilter,
Set<String> nameTypes) throws IOException {
@@ -41,11 +40,10 @@ public class BratNameSampleStreamTest {
BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
- File dir = new File(this.getClass().getResource("/opennlp/tools/formats/brat/").getFile());
FileFilter fileFilter = pathname -> pathname.getName().contains(nameContainsFilter);
- ObjectStream<BratDocument> bratDocumentStream = new BratDocumentStream(config, dir,
- false, fileFilter);
+ ObjectStream<BratDocument> bratDocumentStream =
+ new BratDocumentStream(config, directory, false, fileFilter);
return new BratNameSampleStream(new NewlineSentenceDetector(),
WhitespaceTokenizer.INSTANCE, bratDocumentStream, nameTypes);
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/AbstractConlluSampleStreamTest.java
similarity index 58%
copy from opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
copy to opennlp-tools/src/test/java/opennlp/tools/formats/conllu/AbstractConlluSampleStreamTest.java
index 1aca7a9c..cec1d16d 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/AbstractConlluSampleStreamTest.java
@@ -15,26 +15,23 @@
* limitations under the License.
*/
-package opennlp.tools.formats.muc;
+package opennlp.tools.formats.conllu;
import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
-import org.junit.jupiter.api.Test;
+import opennlp.tools.commons.Sample;
+import opennlp.tools.formats.AbstractFormatTest;
+import opennlp.tools.formats.ResourceAsStreamFactory;
-public class SgmlParserTest {
+public abstract class AbstractConlluSampleStreamTest<T extends Sample> extends AbstractFormatTest {
- @Test
- void testParse1() throws IOException {
+ protected final List<T> samples = new ArrayList<>();
- try (Reader in = new InputStreamReader(
- SgmlParserTest.class.getResourceAsStream("parsertest1.sgml"), StandardCharsets.UTF_8)) {
- SgmlParser parser = new SgmlParser();
- parser.parse(in, new SgmlParser.ContentHandler() {
- });
- }
+ protected ConlluStream getStream(String resource) throws IOException {
+ return new ConlluStream(new ResourceAsStreamFactory(
+ AbstractConlluSampleStreamTest.class, FORMATS_BASE_DIR + "conllu/" + resource));
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStreamTest.java
index 1ffc76c5..1b892897 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStreamTest.java
@@ -23,24 +23,20 @@ import java.io.IOException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.lemmatizer.LemmaSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
-public class ConlluLemmaSampleStreamTest {
+public class ConlluLemmaSampleStreamTest extends AbstractConlluSampleStreamTest<LemmaSample> {
@Test
void testParseSpanishS300() throws IOException {
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "es-ud-sample.conllu");
-
- try (ObjectStream<LemmaSample> stream = new ConlluLemmaSampleStream(
- new ConlluStream(streamFactory), ConlluTagset.U)) {
+ ConlluStream cStream = getStream("es-ud-sample.conllu");
+ Assertions.assertNotNull(cStream);
+
+ try (ObjectStream<LemmaSample> stream = new ConlluLemmaSampleStream(cStream, ConlluTagset.U)) {
LemmaSample predicted = stream.read();
- System.out.println(predicted);
Assertions.assertEquals("digám+tú+él", predicted.getLemmas()[0]);
Assertions.assertEquals("la", predicted.getTokens()[3]);
Assertions.assertEquals("el", predicted.getLemmas()[3]);
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluPOSSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluPOSSampleStreamTest.java
index ffff417b..48ac638e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluPOSSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluPOSSampleStreamTest.java
@@ -23,19 +23,17 @@ import java.io.IOException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.postag.POSSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
-public class ConlluPOSSampleStreamTest {
+public class ConlluPOSSampleStreamTest extends AbstractConlluSampleStreamTest<POSSample> {
+
@Test
void testParseContraction() throws IOException {
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "pt_br-ud-sample.conllu");
+ ConlluStream cStream = getStream("pt_br-ud-sample.conllu");
+ Assertions.assertNotNull(cStream);
- try (ObjectStream<POSSample> stream = new ConlluPOSSampleStream(
- new ConlluStream(streamFactory), ConlluTagset.U)) {
+ try (ObjectStream<POSSample> stream = new ConlluPOSSampleStream(cStream, ConlluTagset.U)) {
POSSample expected = POSSample.parse("Numa_ADP+DET reunião_NOUN entre_ADP " +
"representantes_NOUN da_ADP+DET Secretaria_PROPN da_ADP+DET Criança_PROPN do_ADP+DET " +
@@ -55,11 +53,10 @@ public class ConlluPOSSampleStreamTest {
@Test
void testParseSpanishS300() throws IOException {
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "es-ud-sample.conllu");
-
- try (ObjectStream<POSSample> stream = new ConlluPOSSampleStream(new ConlluStream(streamFactory),
- ConlluTagset.U)) {
+ ConlluStream cStream = getStream("es-ud-sample.conllu");
+ Assertions.assertNotNull(cStream);
+
+ try (ObjectStream<POSSample> stream = new ConlluPOSSampleStream(cStream, ConlluTagset.U)) {
POSSample expected1 = POSSample.parse(
"Digámoslo_VERB+PRON+PRON claramente_ADV ,_PUNCT la_DET insurgencia_NOUN se_PRON " +
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluSentenceSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluSentenceSampleStreamTest.java
index 67dff7e9..304574e0 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluSentenceSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluSentenceSampleStreamTest.java
@@ -22,21 +22,18 @@ import java.io.IOException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
-public class ConlluSentenceSampleStreamTest {
+public class ConlluSentenceSampleStreamTest extends AbstractConlluSampleStreamTest<SentenceSample> {
@Test
void testParseTwoSentences() throws IOException {
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "de-ud-train-sample.conllu");
+ ConlluStream cStream = getStream("de-ud-train-sample.conllu");
+ Assertions.assertNotNull(cStream);
- try (ObjectStream<SentenceSample> stream =
- new ConlluSentenceSampleStream(new ConlluStream(streamFactory), 1)) {
+ try (ObjectStream<SentenceSample> stream = new ConlluSentenceSampleStream(cStream, 1)) {
SentenceSample sample1 = stream.read();
@@ -54,8 +51,10 @@ public class ConlluSentenceSampleStreamTest {
Assertions.assertNull(stream.read(), "Stream must be exhausted");
}
+ cStream = getStream("de-ud-train-sample.conllu");
+ Assertions.assertNotNull(cStream);
try (ObjectStream<SentenceSample> stream =
- new ConlluSentenceSampleStream(new ConlluStream(streamFactory), 3)) {
+ new ConlluSentenceSampleStream(cStream, 3)) {
SentenceSample sample = stream.read();
Assertions.assertEquals("Fachlich kompetent, sehr gute Beratung und ein freundliches Team."
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluStreamTest.java
index f42bce88..5545f321 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluStreamTest.java
@@ -27,19 +27,14 @@ import java.util.Optional;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
-import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.sentdetect.SentenceSample;
import opennlp.tools.util.ObjectStream;
-public class ConlluStreamTest {
+public class ConlluStreamTest extends AbstractConlluSampleStreamTest<SentenceSample> {
@Test
void testParseTwoSentences() throws IOException {
-
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "de-ud-train-sample.conllu");
-
- try (ObjectStream<ConlluSentence> stream = new ConlluStream(streamFactory)) {
+ try (ObjectStream<ConlluSentence> stream = getStream("de-ud-train-sample.conllu")) {
ConlluSentence sent1 = stream.read();
Assertions.assertEquals("train-s21", sent1.getSentenceIdComment());
@@ -61,10 +56,7 @@ public class ConlluStreamTest {
@Test
void testOptionalComments() throws IOException {
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "full-sample.conllu");
-
- try (ObjectStream<ConlluSentence> stream = new ConlluStream(streamFactory)) {
+ try (ObjectStream<ConlluSentence> stream = getStream("full-sample.conllu")) {
ConlluSentence sent1 = stream.read();
Assertions.assertEquals("1", sent1.getSentenceIdComment());
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluTokenSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluTokenSampleStreamTest.java
index d6d96e32..9d69a0a3 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluTokenSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluTokenSampleStreamTest.java
@@ -22,19 +22,16 @@ import java.io.IOException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.tokenize.TokenSample;
-import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
-public class ConlluTokenSampleStreamTest {
+public class ConlluTokenSampleStreamTest extends AbstractConlluSampleStreamTest<TokenSample> {
@Test
void testParseTwoSentences() throws IOException {
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "de-ud-train-sample.conllu");
+ ConlluStream cStream = getStream("de-ud-train-sample.conllu");
- try (ObjectStream<TokenSample> stream = new ConlluTokenSampleStream(new ConlluStream(streamFactory))) {
+ try (ObjectStream<TokenSample> stream = new ConlluTokenSampleStream(cStream)) {
TokenSample expected1 = TokenSample.parse(
"Fachlich kompetent" + TokenSample.DEFAULT_SEPARATOR_CHARS
@@ -53,10 +50,9 @@ public class ConlluTokenSampleStreamTest {
@Test
void testParseContraction() throws IOException {
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "pt_br-ud-sample.conllu");
+ ConlluStream cStream = getStream("pt_br-ud-sample.conllu");
- try (ObjectStream<TokenSample> stream = new ConlluTokenSampleStream(new ConlluStream(streamFactory))) {
+ try (ObjectStream<TokenSample> stream = new ConlluTokenSampleStream(cStream)) {
TokenSample expected1 = TokenSample.parse(
"Numa reunião entre representantes da Secretaria da Criança do DF " +
@@ -75,10 +71,9 @@ public class ConlluTokenSampleStreamTest {
@Test
void testParseSpanishS300() throws IOException {
- InputStreamFactory streamFactory =
- new ResourceAsStreamFactory(ConlluStreamTest.class, "es-ud-sample.conllu");
+ ConlluStream cStream = getStream("es-ud-sample.conllu");
- try (ObjectStream<TokenSample> stream = new ConlluTokenSampleStream(new ConlluStream(streamFactory))) {
+ try (ObjectStream<TokenSample> stream = new ConlluTokenSampleStream(cStream)) {
TokenSample expected1 = TokenSample.parse(
"Digámoslo claramente" + TokenSample.DEFAULT_SEPARATOR_CHARS +
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
index 94ab5a0a..42ef28fb 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
@@ -22,15 +22,17 @@ import java.io.IOException;
import java.io.InputStream;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import opennlp.tools.formats.AbstractFormatTest;
import opennlp.tools.parser.Parse;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
-public class ConstitParseSampleStreamTest {
+public class ConstitParseSampleStreamTest extends AbstractFormatTest {
- private String[] sample1Tokens = new String[] {
+ private final String[] sample1Tokens = new String[] {
"L'",
"autonomie",
"de",
@@ -80,30 +82,27 @@ public class ConstitParseSampleStreamTest {
"."
};
- /**
- * Reads sample1.xml into a byte array.
- *
- * @return byte array containing sample1.xml.
- */
- private static byte[] getSample1() throws IOException {
+ private ObjectStream<byte[]> sample;
+
+ @BeforeEach
+ public void setup() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length;
- try (InputStream sampleIn =
- ConstitParseSampleStreamTest.class.getResourceAsStream("sample1.xml")) {
+ try (InputStream sampleIn = getResourceStream("frenchtreebank/sample1.xml")) {
while ((length = sampleIn.read(buffer)) > 0) {
out.write(buffer, 0, length);
}
}
- return out.toByteArray();
+ sample = ObjectStreamUtils.createObjectStream(out.toByteArray());
+ Assertions.assertNotNull(sample);
}
@Test
void testThereIsExactlyOneSent() throws IOException {
- try (ObjectStream<Parse> samples =
- new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) {
+ try (ObjectStream<Parse> samples = new ConstitParseSampleStream(sample)) {
Assertions.assertNotNull(samples.read());
Assertions.assertNull(samples.read());
Assertions.assertNull(samples.read());
@@ -113,8 +112,7 @@ public class ConstitParseSampleStreamTest {
@Test
void testTokensAreCorrect() throws IOException {
- try (ObjectStream<Parse> samples =
- new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) {
+ try (ObjectStream<Parse> samples = new ConstitParseSampleStream(sample)) {
Parse p = samples.read();
Parse[] tagNodes = p.getTagNodes();
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/irishsentencebank/IrishSentenceBankDocumentTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/irishsentencebank/IrishSentenceBankDocumentTest.java
index 71ead885..b0f111e0 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/irishsentencebank/IrishSentenceBankDocumentTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/irishsentencebank/IrishSentenceBankDocumentTest.java
@@ -24,15 +24,15 @@ import java.util.List;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import opennlp.tools.formats.AbstractFormatTest;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.util.Span;
-public class IrishSentenceBankDocumentTest {
+public class IrishSentenceBankDocumentTest extends AbstractFormatTest {
@Test
void testParsingSimpleDoc() throws IOException {
- try (InputStream irishSBXmlIn =
- IrishSentenceBankDocumentTest.class.getResourceAsStream("irishsentencebank-sample.xml")) {
+ try (InputStream irishSBXmlIn = getResourceStream("irishsentencebank/irishsentencebank-sample.xml")) {
IrishSentenceBankDocument doc = IrishSentenceBankDocument.parse(irishSBXmlIn);
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/letsmt/LetsmtDocumentTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/letsmt/LetsmtDocumentTest.java
index e73c942b..138601e8 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/letsmt/LetsmtDocumentTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/letsmt/LetsmtDocumentTest.java
@@ -24,11 +24,13 @@ import java.util.List;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-public class LetsmtDocumentTest {
+import opennlp.tools.formats.AbstractFormatTest;
+
+public class LetsmtDocumentTest extends AbstractFormatTest {
@Test
void testParsingSimpleDoc() throws IOException {
- try (InputStream letsmtXmlIn = LetsmtDocumentTest.class.getResourceAsStream("letsmt-with-words.xml");) {
+ try (InputStream letsmtXmlIn = getResourceStream("letsmt/letsmt-with-words.xml");) {
LetsmtDocument doc = LetsmtDocument.parse(letsmtXmlIn);
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/AbstractMascSampleStreamTest.java
similarity index 58%
copy from opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
copy to opennlp-tools/src/test/java/opennlp/tools/formats/masc/AbstractMascSampleStreamTest.java
index 1aca7a9c..08bdb97e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/AbstractMascSampleStreamTest.java
@@ -15,26 +15,31 @@
* limitations under the License.
*/
-package opennlp.tools.formats.muc;
+package opennlp.tools.formats.masc;
+import java.io.File;
import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
-public class SgmlParserTest {
+import opennlp.tools.formats.AbstractFormatTest;
- @Test
- void testParse1() throws IOException {
+public abstract class AbstractMascSampleStreamTest extends AbstractFormatTest {
- try (Reader in = new InputStreamReader(
- SgmlParserTest.class.getResourceAsStream("parsertest1.sgml"), StandardCharsets.UTF_8)) {
- SgmlParser parser = new SgmlParser();
- parser.parse(in, new SgmlParser.ContentHandler() {
- });
- }
+ protected File directory;
+
+ @BeforeEach
+ public void setup() throws IOException {
+ directory = getMascDir();
+ Assertions.assertNotNull(directory);
+ }
+
+ private String getDirectoryAsString() {
+ return getResource("masc/").getFile();
}
+ protected File getMascDir() {
+ return new File(getDirectoryAsString());
+ }
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamTest.java
index 5dea461a..50211b3f 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamTest.java
@@ -17,11 +17,11 @@
package opennlp.tools.formats.masc;
-import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import opennlp.tools.namefind.NameFinderME;
@@ -33,18 +33,22 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
-public class MascNamedEntitySampleStreamTest {
+public class MascNamedEntitySampleStreamTest extends AbstractMascSampleStreamTest {
+
+ private MascNamedEntitySampleStream stream;
+
+ @BeforeEach
+ public void setup() throws IOException {
+ super.setup();
+ FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
+ stream = new MascNamedEntitySampleStream(
+ new MascDocumentStream(directory, true, fileFilter));
+ Assertions.assertNotNull(stream);
+ }
@Test
void read() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascNamedEntitySampleStream stream;
- stream = new MascNamedEntitySampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
NameSample s = stream.read();
String[] expectedTokens = {"This", "is", "a", "test", "Sentence", "."};
@@ -54,9 +58,7 @@ public class MascNamedEntitySampleStreamTest {
Span[] returnedTags = s.getNames();
// check the start/end positions
Assertions.assertEquals(expectedTags.length, returnedTags.length);
- for (int i = 0; i < returnedTags.length; i++) {
- Assertions.assertTrue(expectedTags[i].equals(returnedTags[i]));
- }
+ Assertions.assertArrayEquals(expectedTags, returnedTags);
s = stream.read();
expectedTokens = new String[] {"This", "is", "'nother", "test", "sentence", "."};
@@ -74,13 +76,6 @@ public class MascNamedEntitySampleStreamTest {
@Test
void close() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascNamedEntitySampleStream stream;
- stream = new MascNamedEntitySampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
stream.close();
NameSample s = stream.read();
} catch (IOException e) {
@@ -93,13 +88,6 @@ public class MascNamedEntitySampleStreamTest {
@Test
void reset() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascNamedEntitySampleStream stream;
- stream = new MascNamedEntitySampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
NameSample s = stream.read();
s = stream.read();
s = stream.read();
@@ -115,9 +103,7 @@ public class MascNamedEntitySampleStreamTest {
Span[] returnedTags = s.getNames();
// check the start/end positions
Assertions.assertEquals(expectedTags.length, returnedTags.length);
- for (int i = 0; i < returnedTags.length; i++) {
- Assertions.assertTrue(expectedTags[i].equals(returnedTags[i]));
- }
+ Assertions.assertArrayEquals(expectedTags, returnedTags);
} catch (IOException e) {
Assertions.fail("IO Exception: " + e.getMessage());
@@ -127,19 +113,15 @@ public class MascNamedEntitySampleStreamTest {
@Test
void train() {
try {
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
FileFilter fileFilter = pathname -> pathname.getName().contains("");
ObjectStream<NameSample> trainSample = new MascNamedEntitySampleStream(
- new MascDocumentStream(directory,
- true, fileFilter));
+ new MascDocumentStream(directory, true, fileFilter));
System.out.println("Training");
- TokenNameFinderModel model = null;
TrainingParameters trainingParameters = new TrainingParameters();
trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, 100);
- model = NameFinderME.train("en", null, trainSample,
+ TokenNameFinderModel model = NameFinderME.train("en", null, trainSample,
trainingParameters, new TokenNameFinderFactory());
ObjectStream<NameSample> testNames = new MascNamedEntitySampleStream(
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascPOSSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascPOSSampleStreamTest.java
index a38032f4..1a0a7f4b 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascPOSSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascPOSSampleStreamTest.java
@@ -17,12 +17,12 @@
package opennlp.tools.formats.masc;
-import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.Arrays;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import opennlp.tools.postag.POSEvaluator;
@@ -33,18 +33,22 @@ import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
-public class MascPOSSampleStreamTest {
+public class MascPOSSampleStreamTest extends AbstractMascSampleStreamTest {
+
+ private MascPOSSampleStream stream;
+
+ @BeforeEach
+ public void setup() throws IOException {
+ super.setup();
+ FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
+ stream = new MascPOSSampleStream(
+ new MascDocumentStream(directory, true, fileFilter));
+ Assertions.assertNotNull(stream);
+ }
@Test
void read() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascPOSSampleStream stream;
- stream = new MascPOSSampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
POSSample s = stream.read();
String[] expectedTokens = {"This", "is", "a", "test", "Sentence", "."};
@@ -67,13 +71,6 @@ public class MascPOSSampleStreamTest {
@Test
void close() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascPOSSampleStream stream;
- stream = new MascPOSSampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
stream.close();
POSSample s = stream.read();
} catch (IOException e) {
@@ -86,13 +83,6 @@ public class MascPOSSampleStreamTest {
@Test
void reset() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascPOSSampleStream stream;
- stream = new MascPOSSampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
POSSample s = stream.read();
s = stream.read();
s = stream.read();
@@ -116,12 +106,9 @@ public class MascPOSSampleStreamTest {
@Test
void train() {
try {
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
FileFilter fileFilter = pathname -> pathname.getName().contains("");
ObjectStream<POSSample> trainPOS = new MascPOSSampleStream(
- new MascDocumentStream(directory,
- true, fileFilter));
+ new MascDocumentStream(directory, true, fileFilter));
System.out.println("Training");
POSModel model = null;
@@ -131,8 +118,8 @@ public class MascPOSSampleStreamTest {
model = POSTaggerME.train("en", trainPOS,
trainingParameters, new POSTaggerFactory());
- ObjectStream<POSSample> testPOS = new MascPOSSampleStream(new MascDocumentStream(directory,
- true, fileFilter));
+ ObjectStream<POSSample> testPOS = new MascPOSSampleStream(
+ new MascDocumentStream(directory, true, fileFilter));
POSEvaluator evaluator = new POSEvaluator(new POSTaggerME(model));
evaluator.evaluate(testPOS);
System.out.println("Accuracy: " + evaluator.getWordAccuracy());
@@ -144,8 +131,6 @@ public class MascPOSSampleStreamTest {
Assertions.fail("Exception raised");
}
-
}
-
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascSentenceSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascSentenceSampleStreamTest.java
index 4b0ea41b..d74afa97 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascSentenceSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascSentenceSampleStreamTest.java
@@ -17,7 +17,6 @@
package opennlp.tools.formats.masc;
-import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.ArrayList;
@@ -25,6 +24,7 @@ import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@@ -37,17 +37,21 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
-public class MascSentenceSampleStreamTest {
+public class MascSentenceSampleStreamTest extends AbstractMascSampleStreamTest {
+ private MascSentenceSampleStream stream;
+
+ @BeforeEach
+ public void setup() throws IOException {
+ super.setup();
+ FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
+ stream = new MascSentenceSampleStream(
+ new MascDocumentStream(directory, true, fileFilter), 2);
+ Assertions.assertNotNull(stream);
+ }
@Test
void reset() {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
try {
- MascSentenceSampleStream stream = new MascSentenceSampleStream(
- new MascDocumentStream(directory, true, fileFilter), 2);
-
//exhaust the fake file
SentenceSample testSample = stream.read();
@@ -65,7 +69,7 @@ public class MascSentenceSampleStreamTest {
sentenceSpans.add(new Span(0, 24));
sentenceSpans.add(new Span(25, 55));
SentenceSample expectedSample = new SentenceSample(documentText,
- sentenceSpans.toArray(new Span[sentenceSpans.size()]));
+ sentenceSpans.toArray(new Span[0]));
Assertions.assertEquals(testSample.toString(), expectedSample.toString());
@@ -78,12 +82,6 @@ public class MascSentenceSampleStreamTest {
void close() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascSentenceSampleStream stream;
- stream = new MascSentenceSampleStream(
- new MascDocumentStream(directory, true, fileFilter), 2);
stream.close();
stream.read();
} catch (IOException e) {
@@ -96,7 +94,6 @@ public class MascSentenceSampleStreamTest {
@Test
void read() {
FileFilter fileFilter = pathname -> pathname.getName().contains("");
- File directory = new File(this.getClass().getResource("/opennlp/tools/formats/masc").getFile());
try {
MascSentenceSampleStream stream = new MascSentenceSampleStream(
new MascDocumentStream(directory, true, fileFilter), 2);
@@ -107,7 +104,7 @@ public class MascSentenceSampleStreamTest {
sentenceSpans.add(new Span(25, 55));
SentenceSample expectedSample = new SentenceSample(documentText,
- sentenceSpans.toArray(new Span[sentenceSpans.size()]));
+ sentenceSpans.toArray(new Span[0]));
SentenceSample testSample = stream.read();
Assertions.assertEquals(testSample.toString(), expectedSample.toString());
@@ -123,12 +120,10 @@ public class MascSentenceSampleStreamTest {
}
- @Disabled //todo: We can't train on the FakeMasc data, it is too small.
@Test
+ @Disabled //todo: We can't train on the FakeMasc data, it is too small.
void train() {
try {
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
FileFilter fileFilter = pathname -> pathname.getName().contains("");
ObjectStream<SentenceSample> trainSentences = new MascSentenceSampleStream(
new MascDocumentStream(directory,
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascTokenSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascTokenSampleStreamTest.java
index d11c7d78..5e3a1eee 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascTokenSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/masc/MascTokenSampleStreamTest.java
@@ -17,12 +17,12 @@
package opennlp.tools.formats.masc;
-import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.Arrays;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import opennlp.tools.tokenize.TokenSample;
@@ -34,18 +34,22 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
-public class MascTokenSampleStreamTest {
+public class MascTokenSampleStreamTest extends AbstractMascSampleStreamTest {
+
+ private MascTokenSampleStream stream;
+
+ @BeforeEach
+ public void setup() throws IOException {
+ super.setup();
+ FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
+ stream = new MascTokenSampleStream(
+ new MascDocumentStream(directory, true, fileFilter));
+ Assertions.assertNotNull(stream);
+ }
@Test
void read() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascTokenSampleStream stream;
- stream = new MascTokenSampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
TokenSample s = stream.read();
String expectedString = "This is a test Sentence.";
@@ -80,13 +84,6 @@ public class MascTokenSampleStreamTest {
@Test
void close() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascTokenSampleStream stream;
- stream = new MascTokenSampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
stream.close();
TokenSample s = stream.read();
} catch (IOException e) {
@@ -99,13 +96,6 @@ public class MascTokenSampleStreamTest {
@Test
void reset() {
try {
- FileFilter fileFilter = pathname -> pathname.getName().contains("MASC");
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
- MascTokenSampleStream stream;
- stream = new MascTokenSampleStream(
- new MascDocumentStream(directory, true, fileFilter));
-
TokenSample s = stream.read();
s = stream.read();
s = stream.read();
@@ -136,12 +126,9 @@ public class MascTokenSampleStreamTest {
@Test
void train() {
try {
- File directory = new File(this.getClass().getResource(
- "/opennlp/tools/formats/masc/").getFile());
FileFilter fileFilter = pathname -> pathname.getName().contains("");
ObjectStream<TokenSample> trainTokens = new MascTokenSampleStream(
- new MascDocumentStream(directory,
- true, fileFilter));
+ new MascDocumentStream(directory, true, fileFilter));
System.out.println("Training");
TokenizerModel model = null;
@@ -152,8 +139,7 @@ public class MascTokenSampleStreamTest {
trainingParameters);
ObjectStream<TokenSample> testTokens = new MascTokenSampleStream(
- new MascDocumentStream(directory,
- true, fileFilter));
+ new MascDocumentStream(directory, true, fileFilter));
TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model));
evaluator.evaluate(testTokens);
System.out.println(evaluator.getFMeasure());
@@ -164,8 +150,6 @@ public class MascTokenSampleStreamTest {
Assertions.fail("Exception raised");
}
-
}
-
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
index 1aca7a9c..486da1e0 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/muc/SgmlParserTest.java
@@ -24,13 +24,15 @@ import java.nio.charset.StandardCharsets;
import org.junit.jupiter.api.Test;
-public class SgmlParserTest {
+import opennlp.tools.formats.AbstractFormatTest;
+
+public class SgmlParserTest extends AbstractFormatTest {
@Test
void testParse1() throws IOException {
- try (Reader in = new InputStreamReader(
- SgmlParserTest.class.getResourceAsStream("parsertest1.sgml"), StandardCharsets.UTF_8)) {
+ try (Reader in = new InputStreamReader(getResourceStream("muc/parsertest1.sgml"),
+ StandardCharsets.UTF_8)) {
SgmlParser parser = new SgmlParser();
parser.parse(in, new SgmlParser.ContentHandler() {
});
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocumentTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocumentTest.java
index 43debb94..25fdb9d8 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocumentTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocumentTest.java
@@ -23,16 +23,16 @@ import java.io.InputStream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-public class NKJPSegmentationDocumentTest {
+import opennlp.tools.formats.AbstractFormatTest;
+
+public class NKJPSegmentationDocumentTest extends AbstractFormatTest {
@Test
void testParsingSimpleDoc() throws IOException {
- try (InputStream nkjpSegXmlIn =
- NKJPSegmentationDocumentTest.class.getResourceAsStream("ann_segmentation.xml")) {
+ try (InputStream nkjpSegXmlIn = getResourceStream("nkjp/ann_segmentation.xml")) {
NKJPSegmentationDocument doc = NKJPSegmentationDocument.parse(nkjpSegXmlIn);
Assertions.assertEquals(1, doc.getSegments().size());
-
Assertions.assertEquals(7, doc.getSegments().get("segm_1.1-s").size());
String src = "To krótkie zdanie w drugim akapicie.";
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/nkjp/NKJPTextDocumentTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/nkjp/NKJPTextDocumentTest.java
index efe0f420..0f0d0b82 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/nkjp/NKJPTextDocumentTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/nkjp/NKJPTextDocumentTest.java
@@ -23,17 +23,17 @@ import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-public class NKJPTextDocumentTest {
+import opennlp.tools.formats.AbstractFormatTest;
+
+public class NKJPTextDocumentTest extends AbstractFormatTest {
@Test
void testParsingSimpleDoc() throws Exception {
- try (InputStream nkjpTextXmlIn =
- NKJPTextDocumentTest.class.getResourceAsStream("text_structure.xml")) {
+ try (InputStream nkjpTextXmlIn = getResourceStream("nkjp/text_structure.xml")) {
NKJPTextDocument doc = NKJPTextDocument.parse(nkjpTextXmlIn);
Assertions.assertEquals(1, doc.getDivtypes().size());
Assertions.assertEquals("article", doc.getDivtypes().get("div-1"));
-
Assertions.assertEquals(1, doc.getTexts().size());
Assertions.assertEquals(1, doc.getTexts().get("text-1").size());
Assertions.assertEquals(2, doc.getTexts().get("text-1").get("div-1").size());
@@ -45,8 +45,7 @@ public class NKJPTextDocumentTest {
@Test
void testGetParagraphs() throws Exception {
- try (InputStream nkjpTextXmlIn =
- NKJPTextDocumentTest.class.getResourceAsStream("text_structure.xml")) {
+ try (InputStream nkjpTextXmlIn = getResourceStream("nkjp/text_structure.xml")) {
NKJPTextDocument doc = NKJPTextDocument.parse(nkjpTextXmlIn);
Map<String, String> paras = doc.getParagraphs();