You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/04/28 18:57:05 UTC

[opennlp-sandbox] branch master updated: Fixes OPENNLP-1486 (#102)

This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


The following commit(s) were added to refs/heads/master by this push:
     new 33eaa52  Fixes OPENNLP-1486 (#102)
33eaa52 is described below

commit 33eaa52473cd69cb6a409de76ff19269cf27de2e
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Fri Apr 28 20:57:01 2023 +0200

    Fixes OPENNLP-1486 (#102)
    
    switches from `java.io.FileWriter` to `java.nio.Files.newBufferedWriter` with explicit use of UTF-8 charset
---
 .../modelbuilder/impls/GenericModelableImpl.java   |  9 ++++-
 .../resolver/DefaultNonReferentialResolver.java    | 10 ++++-
 .../tools/coref/resolver/MaxentResolver.java       | 10 ++++-
 .../java/opennlp/tools/coref/sim/GenderModel.java  | 10 ++++-
 .../opennlp/tools/coref/sim/SimilarityModel.java   | 10 ++++-
 opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java | 33 +++++++++-------
 .../src/main/java/opennlp/tools/dl/StackedRNN.java | 45 ++++++++++++----------
 .../chunker2matcher/ParserCacheSerializer.java     | 21 ++++++----
 pom.xml                                            |  2 +
 9 files changed, 99 insertions(+), 51 deletions(-)

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
index 2df6a9e..68b371b 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
@@ -18,10 +18,13 @@ package opennlp.addons.modelbuilder.impls;
 import java.io.BufferedOutputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.Writer;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.logging.Level;
@@ -59,7 +62,9 @@ public class GenericModelableImpl implements Modelable {
 
   @Override
   public void writeAnnotatedSentences() {
-    try (FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false)) {
+    final Path p = params.getAnnotatedTrainingDataFile().toPath();
+    try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+            StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
       for (String s : annotatedSentences) {
         writer.write(s.replace("\n", " ").trim() + "\n");
       }
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
index f6475a6..6759b26 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
@@ -21,8 +21,12 @@ import java.io.BufferedInputStream;
 import java.io.DataInputStream;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -127,7 +131,9 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver {
     if (ResolverMode.TRAIN == mode) {
       System.err.println(this + " referential");
       if (debugOn) {
-        try (FileWriter writer = new FileWriter(modelName + ".events")) {
+        Path p = Path.of(modelName + ".events");
+        try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+                StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
           for (Event e : events) {
             writer.write(e.toString() + "\n");
           }
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
index 323e863..3b233e6 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
@@ -21,8 +21,12 @@ import java.io.BufferedInputStream;
 import java.io.DataInputStream;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -340,7 +344,9 @@ public abstract class MaxentResolver extends AbstractResolver {
     if (ResolverMode.TRAIN == mode) {
       if (DEBUG) {
         System.err.println(this + " referential");
-        try (FileWriter writer = new FileWriter(modelName + ".events")) {
+        Path p = Path.of(modelName + ".events");
+        try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+                StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
           for (Event e : events) {
             writer.write(e.toString() + "\n");
           }
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
index ac86dd4..ef5e753 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
@@ -24,9 +24,13 @@ import java.io.DataInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -251,7 +255,9 @@ public class GenderModel implements TestGenderModel, TrainSimilarityModel {
   @Override
   public void trainModel() throws IOException {
     if (debugOn) {
-      try (FileWriter writer = new FileWriter(modelName + ".events")) {
+      Path p = Path.of(modelName + ".events");
+      try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+              StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
         for (Event e : events) {
           writer.write(e.toString() + "\n");
         }
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
index 34b8472..143d575 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
@@ -22,9 +22,13 @@ import java.io.BufferedReader;
 import java.io.DataInputStream;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -356,7 +360,9 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
   @Override
   public void trainModel() throws IOException {
     if (debugOn) {
-      try (FileWriter writer = new FileWriter(modelName + ".events")) {
+      Path p = Path.of(modelName + ".events");
+      try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+              StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
         for (Event e : events) {
           writer.write(e.toString() + "\n");
         }
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java b/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
index 3c143c1..2e17d65 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
@@ -19,9 +19,12 @@
 
 package opennlp.tools.dl;
 
-import java.io.BufferedWriter;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
@@ -349,18 +352,20 @@ public class RNN {
   }
 
   public void serialize(String prefix) throws IOException {
-    try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(prefix + new Date() + ".txt"))) {
-      bufferedWriter.write("wxh");
-      bufferedWriter.write(wxh.toString());
-      bufferedWriter.write("whh");
-      bufferedWriter.write(whh.toString());
-      bufferedWriter.write("why");
-      bufferedWriter.write(why.toString());
-      bufferedWriter.write("bh");
-      bufferedWriter.write(bh.toString());
-      bufferedWriter.write("by");
-      bufferedWriter.write(by.toString());
-      bufferedWriter.flush();
+    Path p = Path.of(prefix + new Date() + ".txt");
+    try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+            StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
+      writer.write("wxh");
+      writer.write(wxh.toString());
+      writer.write("whh");
+      writer.write(whh.toString());
+      writer.write("why");
+      writer.write(why.toString());
+      writer.write("bh");
+      writer.write(bh.toString());
+      writer.write("by");
+      writer.write(by.toString());
+      writer.flush();
     }
   }
 }
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java b/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
index f60414c..391170b 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
@@ -19,9 +19,12 @@
 
 package opennlp.tools.dl;
 
-import java.io.BufferedWriter;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.Date;
 import java.util.LinkedList;
 import java.util.List;
@@ -336,24 +339,26 @@ public class StackedRNN extends RNN {
 
   @Override
   public void serialize(String prefix) throws IOException {
-    try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(prefix + new Date() + ".txt"))) {
-      bufferedWriter.write("wxh");
-      bufferedWriter.write(wxh.toString());
-      bufferedWriter.write("whh");
-      bufferedWriter.write(whh.toString());
-      bufferedWriter.write("wxh2");
-      bufferedWriter.write(wxh2.toString());
-      bufferedWriter.write("whh2");
-      bufferedWriter.write(whh2.toString());
-      bufferedWriter.write("wh2y");
-      bufferedWriter.write(wh2y.toString());
-      bufferedWriter.write("bh");
-      bufferedWriter.write(bh.toString());
-      bufferedWriter.write("bh2");
-      bufferedWriter.write(bh2.toString());
-      bufferedWriter.write("by");
-      bufferedWriter.write(by.toString());
-      bufferedWriter.flush();
+    Path p = Path.of(prefix + new Date() + ".txt");
+    try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+            StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
+      writer.write("wxh");
+      writer.write(wxh.toString());
+      writer.write("whh");
+      writer.write(whh.toString());
+      writer.write("wxh2");
+      writer.write(wxh2.toString());
+      writer.write("whh2");
+      writer.write(whh2.toString());
+      writer.write("wh2y");
+      writer.write(wh2y.toString());
+      writer.write("bh");
+      writer.write(bh.toString());
+      writer.write("bh2");
+      writer.write(bh2.toString());
+      writer.write("by");
+      writer.write(by.toString());
+      writer.flush();
     }
   }
 
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
index b7e4611..887a6ad 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
@@ -33,14 +33,19 @@
 
 package opennlp.tools.textsimilarity.chunker2matcher;
 
+import java.io.BufferedInputStream;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -71,9 +76,11 @@ public class ParserCacheSerializer {
     } else {
 
       Map<String, String[][]> sentence_parseObject = (Map<String, String[][]>) objectToSerialize;
-      List<String> keys = new ArrayList<>(sentence_parseObject.keySet());
-      try (CSVWriter writer = new CSVWriter(new FileWriter(
-              RESOURCE_DIR + PARSE_CACHE_FILE_NAME_CSV, false))) {
+      final List<String> keys = new ArrayList<>(sentence_parseObject.keySet());
+
+      final Path p = Path.of(RESOURCE_DIR + PARSE_CACHE_FILE_NAME_CSV);
+      try (CSVWriter writer = new CSVWriter(Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+              StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING))) {
         for (String k : keys) {
           String[][] triplet = sentence_parseObject.get(k);
           writer.writeNext(new String[] { k });
@@ -92,7 +99,7 @@ public class ParserCacheSerializer {
     if (JAVA_OBJECT_SERIALIZATION) {
       String filename = RESOURCE_DIR + PARSE_CACHE_FILE_NAME;
       Object data = null;
-      try (FileInputStream fis = new FileInputStream(filename);
+      try (InputStream fis = new BufferedInputStream(new FileInputStream(filename));
            ObjectInputStream in = new ObjectInputStream(fis)) {
 
         data = in.readObject();
@@ -109,8 +116,8 @@ public class ParserCacheSerializer {
               + PARSE_CACHE_FILE_NAME_CSV), ',')) {
         lines = reader.readAll();
       } catch (FileNotFoundException e) {
-    	  if (JAVA_OBJECT_SERIALIZATION)
-    		  System.err.println("Cannot find cache file");
+        if (JAVA_OBJECT_SERIALIZATION)
+          System.err.println("Cannot find cache file");
         return null;
       } catch (IOException ioe) {
         ioe.printStackTrace();
diff --git a/pom.xml b/pom.xml
index 6494209..77ec83f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -442,6 +442,8 @@
                                 <exclude>**/src/main/java/opennlp/tools/similarity/apps/gen.txt</exclude>
                                 <!-- These files are samples in wikinews-importer -->
                                 <exclude>**/samples/*.xmi</exclude>
+                                <!-- This is a log file of DerbyDB being created during test runs -->
+                                <exclude>**/derby.log</exclude>
                             </excludes>
                         </configuration>
                     </execution>