You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/05/19 07:08:43 UTC
[opennlp] 01/01: OPENNLP-1494 Improve resource handling of AutoClosable streams in several classes
This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch OPENNLP-1494_Improve_resource_handling_of_AutoClosable_streams_in_several_classes
in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 9cfe4a0b6636b5310f3d23e7fc2265e68e29a884
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Fri May 19 09:08:32 2023 +0200
OPENNLP-1494 Improve resource handling of AutoClosable streams in several classes
---
.../tools/cmdline/chunker/ChunkerMETool.java | 6 +-
.../tools/cmdline/lemmatizer/LemmatizerMETool.java | 11 +--
.../cmdline/namefind/TokenNameFinderTool.java | 9 +-
.../opennlp/tools/cmdline/parser/ParserTool.java | 7 +-
.../tools/cmdline/postag/POSTaggerTool.java | 7 +-
.../tools/formats/masc/MascDocumentStream.java | 11 ++-
.../opennlp/tools/parser/ParserCrossValidator.java | 32 +++----
.../formats/NameFinderCensus90NameStreamTest.java | 106 ++++++++++-----------
.../leipzig/LeipzigLanguageSampleStreamTest.java | 23 +++--
.../tools/parser/ParseSampleStreamTest.java | 13 +--
10 files changed, 108 insertions(+), 117 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
index 7b43396a..e9fdbe30 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
@@ -59,12 +59,10 @@ public class ChunkerMETool extends BasicCmdLineTool {
ChunkerME chunker = new ChunkerME(model);
- ObjectStream<String> lineStream;
PerformanceMonitor perfMon = null;
- try {
- lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(),
- SystemInputStreamFactory.encoding());
+ try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+ new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
perfMon = new PerformanceMonitor("sent");
perfMon.start();
String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
index 71f4f692..49891b71 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
@@ -54,17 +54,14 @@ public class LemmatizerMETool extends BasicCmdLineTool {
if (args.length != 1) {
logger.info(getHelp());
} else {
- LemmatizerModel model = new LemmatizerModelLoader()
- .load(new File(args[0]));
-
+ LemmatizerModel model = new LemmatizerModelLoader().load(new File(args[0]));
LemmatizerME lemmatizer = new LemmatizerME(model);
- ObjectStream<String> lineStream;
PerformanceMonitor perfMon = null;
- try {
- lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(),
- SystemInputStreamFactory.encoding());
+ try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+ new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
+
perfMon = new PerformanceMonitor("sent");
perfMon.start();
String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
index 15f21ce6..ca40f3b2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
@@ -68,15 +68,12 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
nameFinders[i] = new NameFinderME(model);
}
- // ObjectStream<String> untokenizedLineStream =
- // new PlainTextByLineStream(new InputStreamReader(System.in));
- ObjectStream<String> untokenizedLineStream;
PerformanceMonitor perfMon = new PerformanceMonitor("sent");
perfMon.start();
- try {
- untokenizedLineStream = new PlainTextByLineStream(
- new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
+ try (ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream(
+ new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
+
String line;
while ((line = untokenizedLineStream.read()) != null) {
String[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
index 05c55c2e..90ba1f44 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
@@ -136,11 +136,10 @@ public final class ParserTool extends BasicCmdLineTool {
Parser parser = ParserFactory.create(model, beamSize, advancePercentage);
- ObjectStream<String> lineStream;
PerformanceMonitor perfMon = null;
- try {
- lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(),
- SystemInputStreamFactory.encoding());
+ try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+ new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
+
perfMon = new PerformanceMonitor("sent");
perfMon.start();
String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
index 2718ddf4..d5e61a1b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
@@ -60,12 +60,11 @@ public final class POSTaggerTool extends BasicCmdLineTool {
POSTaggerME tagger = new POSTaggerME(model);
- ObjectStream<String> lineStream;
PerformanceMonitor perfMon = null;
- try {
- lineStream =
- new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
+ try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+ new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
+
perfMon = new PerformanceMonitor("sent");
perfMon.start();
String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
index afe02a2f..ffd35c70 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
@@ -86,13 +86,12 @@ public class MascDocumentStream implements ObjectStream<MascDocument> {
}
}
- private List<MascDocument> documents = new LinkedList<>();
+ private final List<MascDocument> documents = new LinkedList<>();
private Iterator<MascDocument> documentIterator;
- private SAXParser saxParser;
+ private final SAXParser saxParser;
public MascDocumentStream(File mascCorpusDirectory) throws IOException {
- FileFilter fileFilter = pathname -> pathname.getName().contains("");
- new MascDocumentStream(mascCorpusDirectory, true, fileFilter);
+ this(mascCorpusDirectory, true, pathname -> pathname.getName().contains(""));
}
/**
@@ -200,6 +199,7 @@ public class MascDocumentStream implements ObjectStream<MascDocument> {
* Reset the reading of all documents to the first sentence.
* Reset the corpus to the first document.
*/
+ @Override
public void reset() {
for (MascDocument doc : documents) {
doc.reset();
@@ -213,6 +213,7 @@ public class MascDocumentStream implements ObjectStream<MascDocument> {
* @return A corpus document with all its annotations.
* @throws IOException if anything goes wrong.
*/
+ @Override
public MascDocument read() throws IOException {
MascDocument doc = null;
@@ -227,8 +228,8 @@ public class MascDocumentStream implements ObjectStream<MascDocument> {
/**
* Remove the corpus from the memory.
*/
+ @Override
public void close() {
- documents = null;
documentIterator = null;
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
index c3577e33..6ec7e4de 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
@@ -73,23 +73,23 @@ public class ParserCrossValidator {
CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds);
while (partitioner.hasNext()) {
- CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next();
-
- ParserModel model;
- if (ParserType.CHUNKING.equals(parserType)) {
- model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params);
- }
- else if (ParserType.TREEINSERT.equals(parserType)) {
- model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params);
- }
- else {
- throw new IllegalStateException("Unexpected parser type: " + parserType);
+ try (CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next()) {
+ ParserModel model;
+ if (ParserType.CHUNKING.equals(parserType)) {
+ model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params);
+ }
+ else if (ParserType.TREEINSERT.equals(parserType)) {
+ model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params);
+ }
+ else {
+ throw new IllegalStateException("Unexpected parser type: " + parserType);
+ }
+
+ ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors);
+ evaluator.evaluate(trainingSampleStream.getTestSampleStream());
+
+ fmeasure.mergeInto(evaluator.getFMeasure());
}
-
- ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors);
- evaluator.evaluate(trainingSampleStream.getTestSampleStream());
-
- fmeasure.mergeInto(evaluator.getFMeasure());
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
index 8e04b929..25e57e4e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
@@ -31,63 +31,63 @@ public class NameFinderCensus90NameStreamTest extends AbstractSampleStreamTest {
@Test
void testParsingEnglishSample() throws IOException {
- ObjectStream<StringList> sampleStream = openData();
+ try (ObjectStream<StringList> sampleStream = openData()) {
+ StringList personName = sampleStream.read();
- StringList personName = sampleStream.read();
+ // verify the first 5 taken from the Surname data
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Smith", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Johnson", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Williams", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Jones", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Brown", personName.getToken(0));
- // verify the first 5 taken from the Surname data
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Smith", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Johnson", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Williams", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Jones", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Brown", personName.getToken(0));
+ // verify the next 5 taken from the female names
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Mary", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Patricia", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Linda", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Barbara", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Elizabeth", personName.getToken(0));
- // verify the next 5 taken from the female names
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Mary", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Patricia", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Linda", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Barbara", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Elizabeth", personName.getToken(0));
+ // verify the last 5 taken from the male names
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("James", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("John", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Robert", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("Michael", personName.getToken(0));
+ personName = sampleStream.read();
+ Assertions.assertNotNull(personName);
+ Assertions.assertEquals("William", personName.getToken(0));
- // verify the last 5 taken from the male names
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("James", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("John", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Robert", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("Michael", personName.getToken(0));
- personName = sampleStream.read();
- Assertions.assertNotNull(personName);
- Assertions.assertEquals("William", personName.getToken(0));
-
- // verify the end of the file.
- personName = sampleStream.read();
- Assertions.assertNull(personName);
+ // verify the end of the file.
+ personName = sampleStream.read();
+ Assertions.assertNull(personName);
+ }
}
private ObjectStream<StringList> openData() throws IOException {
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamTest.java
index 3bbc33c9..a9428bb5 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamTest.java
@@ -40,12 +40,11 @@ public class LeipzigLanguageSampleStreamTest {
@Test
void testReadSentenceFiles() {
-
int samplesPerLanguage = 2;
int sentencesPerSample = 1;
- try {
- LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(new File(testDataPath),
- sentencesPerSample, samplesPerLanguage);
+ try (LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(new File(testDataPath),
+ sentencesPerSample, samplesPerLanguage)) {
+
int count = 0;
while (stream.read() != null) {
count++;
@@ -64,22 +63,22 @@ public class LeipzigLanguageSampleStreamTest {
int samplesPerLanguage = 2;
int sentencesPerSample = 2;
- LeipzigLanguageSampleStream stream =
- new LeipzigLanguageSampleStream(new File(testDataPath),
- sentencesPerSample, samplesPerLanguage);
- while (stream.read() != null) ;
+ try (LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(
+ new File(testDataPath), sentencesPerSample, samplesPerLanguage)) {
+
+ while (stream.read() != null) ;
+ }
});
}
@Test
void testReadSentenceFilesWithEmptyDir() {
-
int samplesPerLanguage = 2;
int sentencesPerSample = 1;
- try {
- LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(emptyTempDir,
- sentencesPerSample, samplesPerLanguage);
+ try (LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(
+ emptyTempDir, sentencesPerSample, samplesPerLanguage)) {
+
int count = 0;
while (stream.read() != null) {
count++;
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java
index bd7b97e7..0e8649d9 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java
@@ -40,11 +40,12 @@ public class ParseSampleStreamTest {
@Test
void testReadTestStream() throws IOException {
- ObjectStream<Parse> parseStream = createParseSampleStream();
- Assertions.assertNotNull(parseStream.read());
- Assertions.assertNotNull(parseStream.read());
- Assertions.assertNotNull(parseStream.read());
- Assertions.assertNotNull(parseStream.read());
- Assertions.assertNull(parseStream.read());
+ try (ObjectStream<Parse> parseStream = createParseSampleStream()) {
+ Assertions.assertNotNull(parseStream.read());
+ Assertions.assertNotNull(parseStream.read());
+ Assertions.assertNotNull(parseStream.read());
+ Assertions.assertNotNull(parseStream.read());
+ Assertions.assertNull(parseStream.read());
+ }
}
}