You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/04/21 08:10:32 UTC
[opennlp-sandbox] 01/01: switches from `java.io.FileWriter` to `java.nio.Files.newBufferedWriter` with explicit use of UTF-8 charset resolves OPENNLP-1486
This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch OPENNLP-1486_Switch_to_BufferedWriter_in_various_sandbox_components
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit efd3c236276021a9729897826726258719c07a8b
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Fri Apr 21 10:10:24 2023 +0200
switches from `java.io.FileWriter` to `java.nio.Files.newBufferedWriter` with explicit use of UTF-8 charset
resolves OPENNLP-1486
---
.../modelbuilder/impls/GenericModelableImpl.java | 9 ++++-
.../resolver/DefaultNonReferentialResolver.java | 10 ++++-
.../tools/coref/resolver/MaxentResolver.java | 10 ++++-
.../java/opennlp/tools/coref/sim/GenderModel.java | 10 ++++-
.../opennlp/tools/coref/sim/SimilarityModel.java | 10 ++++-
opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java | 33 +++++++++-------
.../src/main/java/opennlp/tools/dl/StackedRNN.java | 45 ++++++++++++----------
.../chunker2matcher/ParserCacheSerializer.java | 21 ++++++----
pom.xml | 2 +
9 files changed, 99 insertions(+), 51 deletions(-)
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
index 2df6a9e..68b371b 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
@@ -18,10 +18,13 @@ package opennlp.addons.modelbuilder.impls;
import java.io.BufferedOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
-import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
+import java.io.Writer;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
@@ -59,7 +62,9 @@ public class GenericModelableImpl implements Modelable {
@Override
public void writeAnnotatedSentences() {
- try (FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false)) {
+ final Path p = params.getAnnotatedTrainingDataFile().toPath();
+ try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+ StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
for (String s : annotatedSentences) {
writer.write(s.replace("\n", " ").trim() + "\n");
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
index f6475a6..6759b26 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
@@ -21,8 +21,12 @@ import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;
@@ -127,7 +131,9 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver {
if (ResolverMode.TRAIN == mode) {
System.err.println(this + " referential");
if (debugOn) {
- try (FileWriter writer = new FileWriter(modelName + ".events")) {
+ Path p = Path.of(modelName + ".events");
+ try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+ StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
for (Event e : events) {
writer.write(e.toString() + "\n");
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
index 323e863..3b233e6 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
@@ -21,8 +21,12 @@ import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;
@@ -340,7 +344,9 @@ public abstract class MaxentResolver extends AbstractResolver {
if (ResolverMode.TRAIN == mode) {
if (DEBUG) {
System.err.println(this + " referential");
- try (FileWriter writer = new FileWriter(modelName + ".events")) {
+ Path p = Path.of(modelName + ".events");
+ try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+ StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
for (Event e : events) {
writer.write(e.toString() + "\n");
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
index ac86dd4..ef5e753 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
@@ -24,9 +24,13 @@ import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
-import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@@ -251,7 +255,9 @@ public class GenderModel implements TestGenderModel, TrainSimilarityModel {
@Override
public void trainModel() throws IOException {
if (debugOn) {
- try (FileWriter writer = new FileWriter(modelName + ".events")) {
+ Path p = Path.of(modelName + ".events");
+ try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+ StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
for (Event e : events) {
writer.write(e.toString() + "\n");
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
index 34b8472..143d575 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
@@ -22,9 +22,13 @@ import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -356,7 +360,9 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
@Override
public void trainModel() throws IOException {
if (debugOn) {
- try (FileWriter writer = new FileWriter(modelName + ".events")) {
+ Path p = Path.of(modelName + ".events");
+ try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+ StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
for (Event e : events) {
writer.write(e.toString() + "\n");
}
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java b/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
index 3c143c1..2e17d65 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
@@ -19,9 +19,12 @@
package opennlp.tools.dl;
-import java.io.BufferedWriter;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
@@ -349,18 +352,20 @@ public class RNN {
}
public void serialize(String prefix) throws IOException {
- try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(prefix + new Date() + ".txt"))) {
- bufferedWriter.write("wxh");
- bufferedWriter.write(wxh.toString());
- bufferedWriter.write("whh");
- bufferedWriter.write(whh.toString());
- bufferedWriter.write("why");
- bufferedWriter.write(why.toString());
- bufferedWriter.write("bh");
- bufferedWriter.write(bh.toString());
- bufferedWriter.write("by");
- bufferedWriter.write(by.toString());
- bufferedWriter.flush();
+ Path p = Path.of(prefix + new Date() + ".txt");
+ try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+ StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
+ writer.write("wxh");
+ writer.write(wxh.toString());
+ writer.write("whh");
+ writer.write(whh.toString());
+ writer.write("why");
+ writer.write(why.toString());
+ writer.write("bh");
+ writer.write(bh.toString());
+ writer.write("by");
+ writer.write(by.toString());
+ writer.flush();
}
}
}
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java b/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
index f60414c..391170b 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
@@ -19,9 +19,12 @@
package opennlp.tools.dl;
-import java.io.BufferedWriter;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
@@ -336,24 +339,26 @@ public class StackedRNN extends RNN {
@Override
public void serialize(String prefix) throws IOException {
- try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(prefix + new Date() + ".txt"))) {
- bufferedWriter.write("wxh");
- bufferedWriter.write(wxh.toString());
- bufferedWriter.write("whh");
- bufferedWriter.write(whh.toString());
- bufferedWriter.write("wxh2");
- bufferedWriter.write(wxh2.toString());
- bufferedWriter.write("whh2");
- bufferedWriter.write(whh2.toString());
- bufferedWriter.write("wh2y");
- bufferedWriter.write(wh2y.toString());
- bufferedWriter.write("bh");
- bufferedWriter.write(bh.toString());
- bufferedWriter.write("bh2");
- bufferedWriter.write(bh2.toString());
- bufferedWriter.write("by");
- bufferedWriter.write(by.toString());
- bufferedWriter.flush();
+ Path p = Path.of(prefix + new Date() + ".txt");
+ try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+ StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
+ writer.write("wxh");
+ writer.write(wxh.toString());
+ writer.write("whh");
+ writer.write(whh.toString());
+ writer.write("wxh2");
+ writer.write(wxh2.toString());
+ writer.write("whh2");
+ writer.write(whh2.toString());
+ writer.write("wh2y");
+ writer.write(wh2y.toString());
+ writer.write("bh");
+ writer.write(bh.toString());
+ writer.write("bh2");
+ writer.write(bh2.toString());
+ writer.write("by");
+ writer.write(by.toString());
+ writer.flush();
}
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
index b7e4611..887a6ad 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
@@ -33,14 +33,19 @@
package opennlp.tools.textsimilarity.chunker2matcher;
+import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -71,9 +76,11 @@ public class ParserCacheSerializer {
} else {
Map<String, String[][]> sentence_parseObject = (Map<String, String[][]>) objectToSerialize;
- List<String> keys = new ArrayList<>(sentence_parseObject.keySet());
- try (CSVWriter writer = new CSVWriter(new FileWriter(
- RESOURCE_DIR + PARSE_CACHE_FILE_NAME_CSV, false))) {
+ final List<String> keys = new ArrayList<>(sentence_parseObject.keySet());
+
+ final Path p = Path.of(RESOURCE_DIR + PARSE_CACHE_FILE_NAME_CSV);
+ try (CSVWriter writer = new CSVWriter(Files.newBufferedWriter(p, StandardCharsets.UTF_8,
+ StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING))) {
for (String k : keys) {
String[][] triplet = sentence_parseObject.get(k);
writer.writeNext(new String[] { k });
@@ -92,7 +99,7 @@ public class ParserCacheSerializer {
if (JAVA_OBJECT_SERIALIZATION) {
String filename = RESOURCE_DIR + PARSE_CACHE_FILE_NAME;
Object data = null;
- try (FileInputStream fis = new FileInputStream(filename);
+ try (InputStream fis = new BufferedInputStream(new FileInputStream(filename));
ObjectInputStream in = new ObjectInputStream(fis)) {
data = in.readObject();
@@ -109,8 +116,8 @@ public class ParserCacheSerializer {
+ PARSE_CACHE_FILE_NAME_CSV), ',')) {
lines = reader.readAll();
} catch (FileNotFoundException e) {
- if (JAVA_OBJECT_SERIALIZATION)
- System.err.println("Cannot find cache file");
+ if (JAVA_OBJECT_SERIALIZATION)
+ System.err.println("Cannot find cache file");
return null;
} catch (IOException ioe) {
ioe.printStackTrace();
diff --git a/pom.xml b/pom.xml
index f15bca7..ac0aca2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -414,6 +414,8 @@
<exclude>**/src/main/java/opennlp/tools/similarity/apps/gen.txt</exclude>
<!-- These files are samples in wikinews-importer -->
<exclude>**/samples/*.xmi</exclude>
+ <!-- This is a log file of DerbyDB being created during test runs -->
+ <exclude>**/derby.log</exclude>
</excludes>
</configuration>
</execution>