You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/05/19 07:08:43 UTC
[opennlp] 01/01: OPENNLP-1494 Improve resource handling of AutoClosable streams in several classes

This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch OPENNLP-1494_Improve_resource_handling_of_AutoClosable_streams_in_several_classes
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit 9cfe4a0b6636b5310f3d23e7fc2265e68e29a884
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Fri May 19 09:08:32 2023 +0200

    OPENNLP-1494 Improve resource handling of AutoClosable streams in several classes
---
 .../tools/cmdline/chunker/ChunkerMETool.java       |   6 +-
 .../tools/cmdline/lemmatizer/LemmatizerMETool.java |  11 +--
 .../cmdline/namefind/TokenNameFinderTool.java      |   9 +-
 .../opennlp/tools/cmdline/parser/ParserTool.java   |   7 +-
 .../tools/cmdline/postag/POSTaggerTool.java        |   7 +-
 .../tools/formats/masc/MascDocumentStream.java     |  11 ++-
 .../opennlp/tools/parser/ParserCrossValidator.java |  32 +++----
 .../formats/NameFinderCensus90NameStreamTest.java  | 106 ++++++++++-----------
 .../leipzig/LeipzigLanguageSampleStreamTest.java   |  23 +++--
 .../tools/parser/ParseSampleStreamTest.java        |  13 +--
 10 files changed, 108 insertions(+), 117 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
index 7b43396a..e9fdbe30 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
@@ -59,12 +59,10 @@ public class ChunkerMETool extends BasicCmdLineTool {
 
       ChunkerME chunker = new ChunkerME(model);
 
-      ObjectStream<String> lineStream;
       PerformanceMonitor perfMon = null;
 
-      try {
-        lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(),
-            SystemInputStreamFactory.encoding());
+      try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+              new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
         perfMon = new PerformanceMonitor("sent");
         perfMon.start();
         String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
index 71f4f692..49891b71 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
@@ -54,17 +54,14 @@ public class LemmatizerMETool extends BasicCmdLineTool {
     if (args.length != 1) {
       logger.info(getHelp());
     } else {
-      LemmatizerModel model = new LemmatizerModelLoader()
-          .load(new File(args[0]));
-
+      LemmatizerModel model = new LemmatizerModelLoader().load(new File(args[0]));
       LemmatizerME lemmatizer = new LemmatizerME(model);
 
-      ObjectStream<String> lineStream;
       PerformanceMonitor perfMon = null;
 
-      try {
-        lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(),
-            SystemInputStreamFactory.encoding());
+      try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+              new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
+
         perfMon = new PerformanceMonitor("sent");
         perfMon.start();
         String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
index 15f21ce6..ca40f3b2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
@@ -68,15 +68,12 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
         nameFinders[i] = new NameFinderME(model);
       }
 
-      // ObjectStream<String> untokenizedLineStream =
-      // new PlainTextByLineStream(new InputStreamReader(System.in));
-      ObjectStream<String> untokenizedLineStream;
       PerformanceMonitor perfMon = new PerformanceMonitor("sent");
       perfMon.start();
 
-      try {
-        untokenizedLineStream = new PlainTextByLineStream(
-                new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
+      try (ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream(
+              new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
+
         String line;
         while ((line = untokenizedLineStream.read()) != null) {
           String[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
index 05c55c2e..90ba1f44 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
@@ -136,11 +136,10 @@ public final class ParserTool extends BasicCmdLineTool {
 
       Parser parser = ParserFactory.create(model, beamSize, advancePercentage);
 
-      ObjectStream<String> lineStream;
       PerformanceMonitor perfMon = null;
-      try {
-        lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(),
-            SystemInputStreamFactory.encoding());
+      try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+              new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
+        
         perfMon = new PerformanceMonitor("sent");
         perfMon.start();
         String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
index 2718ddf4..d5e61a1b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
@@ -60,12 +60,11 @@ public final class POSTaggerTool extends BasicCmdLineTool {
 
       POSTaggerME tagger = new POSTaggerME(model);
 
-      ObjectStream<String> lineStream;
       PerformanceMonitor perfMon = null;
 
-      try {
-        lineStream =
-            new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
+      try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+              new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
+
         perfMon = new PerformanceMonitor("sent");
         perfMon.start();
         String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
index afe02a2f..ffd35c70 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
@@ -86,13 +86,12 @@ public class MascDocumentStream implements ObjectStream<MascDocument> {
     }
 
   }
-  private List<MascDocument> documents = new LinkedList<>();
+  private final List<MascDocument> documents = new LinkedList<>();
   private Iterator<MascDocument> documentIterator;
-  private SAXParser saxParser;
+  private final SAXParser saxParser;
 
   public MascDocumentStream(File mascCorpusDirectory) throws IOException {
-    FileFilter fileFilter = pathname -> pathname.getName().contains("");
-    new MascDocumentStream(mascCorpusDirectory, true, fileFilter);
+    this(mascCorpusDirectory, true, pathname -> pathname.getName().contains(""));
   }
 
   /**
@@ -200,6 +199,7 @@ public class MascDocumentStream implements ObjectStream<MascDocument> {
    * Reset the reading of all documents to the first sentence.
    * Reset the corpus to the first document.
    */
+  @Override
   public void reset() {
     for (MascDocument doc : documents) {
       doc.reset();
@@ -213,6 +213,7 @@ public class MascDocumentStream implements ObjectStream<MascDocument> {
    * @return A corpus document with all its annotations.
    * @throws IOException if anything goes wrong.
    */
+  @Override
   public MascDocument read() throws IOException {
 
     MascDocument doc = null;
@@ -227,8 +228,8 @@ public class MascDocumentStream implements ObjectStream<MascDocument> {
   /**
    * Remove the corpus from the memory.
    */
+  @Override
   public void close() {
-    documents = null;
     documentIterator = null;
   }
 
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
index c3577e33..6ec7e4de 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
@@ -73,23 +73,23 @@ public class ParserCrossValidator {
     CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds);
 
     while (partitioner.hasNext()) {
-      CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next();
-
-      ParserModel model;
-      if (ParserType.CHUNKING.equals(parserType)) {
-        model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params);
-      }
-      else if (ParserType.TREEINSERT.equals(parserType)) {
-        model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params);
-      }
-      else {
-        throw new IllegalStateException("Unexpected parser type: " + parserType);
+      try (CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next()) {
+        ParserModel model;
+        if (ParserType.CHUNKING.equals(parserType)) {
+          model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params);
+        }
+        else if (ParserType.TREEINSERT.equals(parserType)) {
+          model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params);
+        }
+        else {
+          throw new IllegalStateException("Unexpected parser type: " + parserType);
+        }
+
+        ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors);
+        evaluator.evaluate(trainingSampleStream.getTestSampleStream());
+
+        fmeasure.mergeInto(evaluator.getFMeasure());
       }
-
-      ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors);
-      evaluator.evaluate(trainingSampleStream.getTestSampleStream());
-
-      fmeasure.mergeInto(evaluator.getFMeasure());
     }
   }
 
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
index 8e04b929..25e57e4e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
@@ -31,63 +31,63 @@ public class NameFinderCensus90NameStreamTest extends AbstractSampleStreamTest {
   @Test
   void testParsingEnglishSample() throws IOException {
 
-    ObjectStream<StringList> sampleStream = openData();
+    try (ObjectStream<StringList> sampleStream = openData()) {
+      StringList personName = sampleStream.read();
 
-    StringList personName = sampleStream.read();
+      // verify the first 5 taken from the Surname data
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Smith", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Johnson", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Williams", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Jones", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Brown", personName.getToken(0));
 
-    // verify the first 5 taken from the Surname data
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Smith", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Johnson", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Williams", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Jones", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Brown", personName.getToken(0));
+      // verify the next 5 taken from the female names
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Mary", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Patricia", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Linda", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Barbara", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Elizabeth", personName.getToken(0));
 
-    // verify the next 5 taken from the female names
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Mary", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Patricia", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Linda", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Barbara", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Elizabeth", personName.getToken(0));
+      // verify the last 5 taken from the male names
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("James", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("John", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Robert", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("Michael", personName.getToken(0));
+      personName = sampleStream.read();
+      Assertions.assertNotNull(personName);
+      Assertions.assertEquals("William", personName.getToken(0));
 
-    // verify the last 5 taken from the male names
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("James", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("John", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Robert", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("Michael", personName.getToken(0));
-    personName = sampleStream.read();
-    Assertions.assertNotNull(personName);
-    Assertions.assertEquals("William", personName.getToken(0));
-
-    // verify the end of the file.
-    personName = sampleStream.read();
-    Assertions.assertNull(personName);
+      // verify the end of the file.
+      personName = sampleStream.read();
+      Assertions.assertNull(personName);
+    }
   }
 
   private ObjectStream<StringList> openData() throws IOException {
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamTest.java
index 3bbc33c9..a9428bb5 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamTest.java
@@ -40,12 +40,11 @@ public class LeipzigLanguageSampleStreamTest {
 
   @Test
   void testReadSentenceFiles() {
-
     int samplesPerLanguage = 2;
     int sentencesPerSample = 1;
-    try {
-      LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(new File(testDataPath),
-          sentencesPerSample, samplesPerLanguage);
+    try (LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(new File(testDataPath),
+            sentencesPerSample, samplesPerLanguage)) {
+      
       int count = 0;
       while (stream.read() != null) {
         count++;
@@ -64,22 +63,22 @@ public class LeipzigLanguageSampleStreamTest {
       int samplesPerLanguage = 2;
       int sentencesPerSample = 2;
 
-      LeipzigLanguageSampleStream stream =
-          new LeipzigLanguageSampleStream(new File(testDataPath),
-              sentencesPerSample, samplesPerLanguage);
-      while (stream.read() != null) ;
+      try (LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(
+              new File(testDataPath), sentencesPerSample, samplesPerLanguage)) {
+
+        while (stream.read() != null) ;
+      }
 
     });
   }
 
   @Test
   void testReadSentenceFilesWithEmptyDir() {
-
     int samplesPerLanguage = 2;
     int sentencesPerSample = 1;
-    try {
-      LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(emptyTempDir,
-          sentencesPerSample, samplesPerLanguage);
+    try (LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(
+            emptyTempDir, sentencesPerSample, samplesPerLanguage)) {
+
       int count = 0;
       while (stream.read() != null) {
         count++;
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java
index bd7b97e7..0e8649d9 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java
@@ -40,11 +40,12 @@ public class ParseSampleStreamTest {
 
   @Test
   void testReadTestStream() throws IOException {
-    ObjectStream<Parse> parseStream = createParseSampleStream();
-    Assertions.assertNotNull(parseStream.read());
-    Assertions.assertNotNull(parseStream.read());
-    Assertions.assertNotNull(parseStream.read());
-    Assertions.assertNotNull(parseStream.read());
-    Assertions.assertNull(parseStream.read());
+    try (ObjectStream<Parse> parseStream = createParseSampleStream()) {
+      Assertions.assertNotNull(parseStream.read());
+      Assertions.assertNotNull(parseStream.read());
+      Assertions.assertNotNull(parseStream.read());
+      Assertions.assertNotNull(parseStream.read());
+      Assertions.assertNull(parseStream.read());
+    }
   }
 }