You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by rz...@apache.org on 2023/01/11 09:15:02 UTC
[opennlp] branch main updated: OPENNLP-1440 Ensure files are read via buffered IO operations
This is an automated email from the ASF dual-hosted git repository.
rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new cdb85d08 OPENNLP-1440 Ensure files are read via buffered IO operations
cdb85d08 is described below
commit cdb85d08b961620fc0a3296eb5b4febdb5806216
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Mon Jan 9 21:41:29 2023 +0100
OPENNLP-1440 Ensure files are read via buffered IO operations
- reworks existing classes that were not using buffered IO for reading files
- fixes a strange dependency in `POSTaggerFactory` to `TokenNameFinderFactory`
---
.../java/opennlp/tools/cmdline/CmdLineUtil.java | 3 ++-
.../java/opennlp/tools/cmdline/ModelLoader.java | 2 +-
.../cmdline/dictionary/DictionaryBuilderTool.java | 6 ++++--
.../languagemodel/NGramLanguageModelTool.java | 4 +++-
.../namefind/CensusDictionaryCreatorTool.java | 3 +--
.../namefind/TokenNameFinderTrainerTool.java | 10 ++++++---
.../sentdetect/SentenceDetectorTrainerTool.java | 3 ++-
.../cmdline/tokenizer/TokenizerTrainerTool.java | 3 ++-
.../java/opennlp/tools/dictionary/Dictionary.java | 2 +-
.../tools/entitylinker/EntityLinkerProperties.java | 3 ++-
.../formats/DetokenizerSampleStreamFactory.java | 4 ++--
.../IrishSentenceBankDocument.java | 3 ++-
.../tools/formats/letsmt/LetsmtDocument.java | 3 ++-
.../formats/nkjp/NKJPSegmentationDocument.java | 3 ++-
.../tools/formats/nkjp/NKJPTextDocument.java | 3 ++-
.../tools/lemmatizer/DictionaryLemmatizer.java | 3 ++-
.../tools/namefind/TokenNameFinderFactory.java | 15 +++++++------
.../opennlp/tools/postag/POSTaggerFactory.java | 25 +++++++++++++---------
.../tools/cmdline/TokenNameFinderToolTest.java | 3 ++-
.../java/opennlp/tools/eval/AbstractEvalTest.java | 5 +++--
.../tools/namefind/TokenNameFinderModelTest.java | 8 ++-----
21 files changed, 67 insertions(+), 47 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
index 863a86de..e645f2ff 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
@@ -17,6 +17,7 @@
package opennlp.tools.cmdline;
+import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
@@ -319,7 +320,7 @@ public final class CmdLineUtil {
checkInputFile("Training Parameter", new File(paramFile));
- try (InputStream paramsIn = new FileInputStream(new File(paramFile))) {
+ try (InputStream paramsIn = new BufferedInputStream(new FileInputStream(paramFile))) {
params = new opennlp.tools.util.TrainingParameters(paramsIn);
} catch (IOException e) {
throw new TerminateToolException(-1, "Error during parameters loading: " + e.getMessage(), e);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ModelLoader.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ModelLoader.java
index e44a8737..4d2dbeb4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ModelLoader.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ModelLoader.java
@@ -54,7 +54,7 @@ public abstract class ModelLoader<T> {
T model;
try (InputStream modelIn = new BufferedInputStream(
- CmdLineUtil.openInFile(modelFile), CmdLineUtil.IO_BUFFER_SIZE)) {
+ CmdLineUtil.openInFile(modelFile), CmdLineUtil.IO_BUFFER_SIZE)) {
model = loadModel(modelIn);
}
catch (InvalidFormatException e) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java
index 20c046dd..752e346c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java
@@ -17,12 +17,14 @@
package opennlp.tools.cmdline.dictionary;
+import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
+import java.io.Reader;
import java.nio.charset.Charset;
import opennlp.tools.cmdline.BasicCmdLineTool;
@@ -56,8 +58,8 @@ public class DictionaryBuilderTool extends BasicCmdLineTool {
CmdLineUtil.checkInputFile("dictionary input file", dictInFile);
CmdLineUtil.checkOutputFile("dictionary output file", dictOutFile);
- try (InputStreamReader in = new InputStreamReader(new FileInputStream(dictInFile), encoding);
- OutputStream out = new FileOutputStream(dictOutFile)) {
+ try (Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(dictInFile), encoding));
+ OutputStream out = new FileOutputStream(dictOutFile)) {
Dictionary dict = Dictionary.parseOneEntryPerLine(in);
dict.serialize(out);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
index 2231ba76..206480ba 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
@@ -16,9 +16,11 @@
*/
package opennlp.tools.cmdline.languagemodel;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.util.Arrays;
import opennlp.tools.cmdline.BasicCmdLineTool;
@@ -44,7 +46,7 @@ public class NGramLanguageModelTool extends BasicCmdLineTool {
@Override
public void run(String[] args) {
File lmFile = new File(args[0]);
- try (FileInputStream stream = new FileInputStream(lmFile)) {
+ try (InputStream stream = new BufferedInputStream(new FileInputStream(lmFile))) {
NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(stream);
ObjectStream<String> lineStream;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
index f9bf5e09..686378ba 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
@@ -109,8 +109,7 @@ public class CensusDictionaryCreatorTool extends BasicCmdLineTool {
InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(testData);
Dictionary mDictionary;
- try (
- ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream(
+ try (ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream(
sampleDataIn, Charset.forName(params.getEncoding()))) {
System.out.println("Creating Dictionary...");
mDictionary = createDictionary(sampleStream);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index c986b762..f21c6ef1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -17,6 +17,7 @@
package opennlp.tools.cmdline.namefind;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -69,7 +70,8 @@ public final class TokenNameFinderTrainerTool
// load descriptor file into memory
if (featureGenDescriptorFile != null) {
- try (InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile)) {
+ try (InputStream bytesIn = new BufferedInputStream(
+ CmdLineUtil.openInFile(featureGenDescriptorFile))) {
featureGeneratorBytes = ModelUtil.read(bytesIn);
} catch (IOException e) {
throw new TerminateToolException(-1, "IO error while reading training data or indexing data: "
@@ -96,7 +98,8 @@ public final class TokenNameFinderTrainerTool
if (featureGenDescriptor != null) {
- try (InputStream xmlDescriptorIn = CmdLineUtil.openInFile(featureGenDescriptor)) {
+ try (InputStream xmlDescriptorIn = new BufferedInputStream(
+ CmdLineUtil.openInFile(featureGenDescriptor))) {
artifactSerializers.putAll(
GeneratorFactory.extractArtifactSerializerMappings(xmlDescriptorIn));
}
@@ -104,7 +107,8 @@ public final class TokenNameFinderTrainerTool
for (Map.Entry<String, ArtifactSerializer<?>> serializerMapping : artifactSerializers.entrySet()) {
String resourceName = serializerMapping.getKey();
- try (InputStream resourceIn = CmdLineUtil.openInFile(new File(resourcePath, resourceName))) {
+ try (InputStream resourceIn = new BufferedInputStream(
+ CmdLineUtil.openInFile(new File(resourcePath, resourceName)))) {
resources.put(resourceName, serializerMapping.getValue().create(resourceIn));
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
index 581bf124..15378f33 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
@@ -17,6 +17,7 @@
package opennlp.tools.cmdline.sentdetect;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -55,7 +56,7 @@ public final class SentenceDetectorTrainerTool
Dictionary dict = null;
if (f != null) {
CmdLineUtil.checkInputFile("abb dict", f);
- dict = new Dictionary(new FileInputStream(f));
+ dict = new Dictionary(new BufferedInputStream(new FileInputStream(f)));
}
return dict;
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
index 3cb1735c..1af1363b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
@@ -17,6 +17,7 @@
package opennlp.tools.cmdline.tokenizer;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -54,7 +55,7 @@ public final class TokenizerTrainerTool
Dictionary dict = null;
if (f != null) {
CmdLineUtil.checkInputFile("abb dict", f);
- dict = new Dictionary(new FileInputStream(f));
+ dict = new Dictionary(new BufferedInputStream(new FileInputStream(f)));
}
return dict;
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
index f3333d4d..d47c821e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
@@ -272,7 +272,7 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
public static Dictionary parseOneEntryPerLine(Reader in) throws IOException {
BufferedReader lineReader = new BufferedReader(in);
- Dictionary dictionary = new Dictionary();
+ final Dictionary dictionary = new Dictionary();
String line;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
index 026efaf0..0e0a53a1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
@@ -17,6 +17,7 @@
package opennlp.tools.entitylinker;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -41,7 +42,7 @@ public class EntityLinkerProperties {
* @throws IOException Thrown if IO errors occurred.
*/
public EntityLinkerProperties(File propertiesfile) throws IOException {
- try (InputStream stream = new FileInputStream(propertiesfile)) {
+ try (InputStream stream = new BufferedInputStream(new FileInputStream(propertiesfile))) {
init(stream);
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
index 0b018a2d..c1e23df3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
@@ -17,7 +17,7 @@
package opennlp.tools.formats;
-import java.io.File;
+import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
@@ -39,7 +39,7 @@ public abstract class DetokenizerSampleStreamFactory<T, P> extends AbstractSampl
protected Detokenizer createDetokenizer(DetokenizerParameter p) {
try {
return new DictionaryDetokenizer(new DetokenizationDictionary(
- new FileInputStream(new File(p.getDetokenizer()))));
+ new BufferedInputStream(new FileInputStream(p.getDetokenizer()))));
} catch (IOException e) {
throw new TerminateToolException(-1, "IO error while loading detokenizer dict: " + e.getMessage(), e);
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/irishsentencebank/IrishSentenceBankDocument.java b/opennlp-tools/src/main/java/opennlp/tools/formats/irishsentencebank/IrishSentenceBankDocument.java
index 8777fa13..6883e035 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/irishsentencebank/IrishSentenceBankDocument.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/irishsentencebank/IrishSentenceBankDocument.java
@@ -17,6 +17,7 @@
package opennlp.tools.formats.irishsentencebank;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -299,7 +300,7 @@ public class IrishSentenceBankDocument {
* @throws IOException Thrown if IO errors occurred.
*/
static IrishSentenceBankDocument parse(File file) throws IOException {
- try (InputStream in = Files.newInputStream(file.toPath())) {
+ try (InputStream in = new BufferedInputStream(Files.newInputStream(file.toPath()))) {
return parse(in);
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtDocument.java b/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtDocument.java
index 60b6bfc0..5b02fa32 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtDocument.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtDocument.java
@@ -17,6 +17,7 @@
package opennlp.tools.formats.letsmt;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -153,7 +154,7 @@ public class LetsmtDocument {
* @throws IOException Thrown if IO errors occurred during loading or parsing.
*/
static LetsmtDocument parse(File file) throws IOException {
- try (InputStream in = new FileInputStream(file)) {
+ try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
return parse(in);
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocument.java b/opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocument.java
index b377460d..0ad6fa3e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocument.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocument.java
@@ -17,6 +17,7 @@
package opennlp.tools.formats.nkjp;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -247,7 +248,7 @@ public class NKJPSegmentationDocument {
}
static NKJPSegmentationDocument parse(File file) throws IOException {
- try (InputStream in = new FileInputStream(file)) {
+ try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
return parse(in);
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPTextDocument.java b/opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPTextDocument.java
index a28aaf0e..3c20e704 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPTextDocument.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPTextDocument.java
@@ -17,6 +17,7 @@
package opennlp.tools.formats.nkjp;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -138,7 +139,7 @@ public class NKJPTextDocument {
}
static NKJPTextDocument parse(File file) throws IOException {
- try (InputStream in = new FileInputStream(file)) {
+ try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
return parse(in);
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
index 1416643d..53f53877 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
@@ -17,6 +17,7 @@
package opennlp.tools.lemmatizer;
+import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
@@ -114,7 +115,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
* {@code dictionaryFile}.
*/
public DictionaryLemmatizer(File dictionaryFile, Charset charset) throws IOException {
- try (InputStream in = new FileInputStream(dictionaryFile)) {
+ try (InputStream in = new BufferedInputStream(new FileInputStream(dictionaryFile))) {
init(in, charset);
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
index 4a87ed14..c451c10d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
@@ -17,6 +17,7 @@
package opennlp.tools.namefind;
+import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -90,13 +91,13 @@ public class TokenNameFinderFactory extends BaseToolFactory {
private static byte[] loadDefaultFeatureGeneratorBytes() {
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
- try (InputStream in = TokenNameFinderFactory.class.getResourceAsStream(
- "/opennlp/tools/namefind/ner-default-features.xml")) {
-
- if (in == null) {
- throw new IllegalStateException("Classpath must contain 'ner-default-features.xml' file!");
- }
-
+ InputStream resource = TokenNameFinderFactory.class.getResourceAsStream(
+ "/opennlp/tools/namefind/ner-default-features.xml");
+ if (resource == null) {
+ throw new IllegalStateException("Classpath must contain 'ner-default-features.xml' file!");
+ }
+
+ try (InputStream in = new BufferedInputStream(resource)) {
byte[] buf = new byte[1024];
int len;
while ((len = in.read(buf)) > 0) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
index 9efffbf6..8f022704 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
@@ -17,6 +17,7 @@
package opennlp.tools.postag;
+import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -32,7 +33,6 @@ import java.util.Set;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.model.AbstractModel;
-import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.SequenceValidator;
@@ -106,16 +106,20 @@ public class POSTaggerFactory extends BaseToolFactory {
this.resources = resources;
this.posDictionary = posDictionary;
}
+
+ /*
+ * Loads the default feature generator bytes via classpath resources.
+ */
private static byte[] loadDefaultFeatureGeneratorBytes() {
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
- try (InputStream in = TokenNameFinderFactory.class.getResourceAsStream(
- "/opennlp/tools/postag/pos-default-features.xml")) {
-
- if (in == null) {
- throw new IllegalStateException("Classpath must contain pos-default-features.xml file!");
- }
+ InputStream resource = POSTaggerFactory.class.getResourceAsStream(
+ "/opennlp/tools/postag/pos-default-features.xml");
+ if (resource == null) {
+ throw new IllegalStateException("Classpath must contain 'pos-default-features.xml' file!");
+ }
+ try (InputStream in = new BufferedInputStream(resource)) {
byte[] buf = new byte[1024];
int len;
while ((len = in.read(buf)) > 0) {
@@ -123,7 +127,7 @@ public class POSTaggerFactory extends BaseToolFactory {
}
}
catch (IOException e) {
- throw new IllegalStateException("Failed reading from pos-default-features.xml file on classpath!");
+ throw new IllegalStateException("Failed reading from 'pos-default-features.xml' file on classpath!");
}
return bytes.toByteArray();
@@ -137,6 +141,7 @@ public class POSTaggerFactory extends BaseToolFactory {
* The generators are created on every call to this method.
*
* @return the feature generator or {@code null} if there is no descriptor in the model
+ * @throws IllegalStateException Thrown if inconsistencies occurred during creation.
*/
public AdaptiveFeatureGenerator createFeatureGenerators() {
@@ -216,7 +221,7 @@ public class POSTaggerFactory extends BaseToolFactory {
*/
public TagDictionary createTagDictionary(File dictionary)
throws IOException {
- return createTagDictionary(new FileInputStream(dictionary));
+ return createTagDictionary(new BufferedInputStream(new FileInputStream(dictionary)));
}
/**
@@ -267,7 +272,7 @@ public class POSTaggerFactory extends BaseToolFactory {
return this.posDictionary;
}
- @Deprecated // will be removed when only 8 series models are supported
+ @Deprecated(forRemoval = true) // will be removed when only 8 series models are supported
private Dictionary getDictionary() {
if (this.ngramDictionary == null && artifactProvider != null)
this.ngramDictionary = artifactProvider.getArtifact(NGRAM_DICTIONARY_ENTRY_NAME);
diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
index 9dbd50b7..80af374d 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
@@ -24,6 +24,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
@@ -123,7 +124,7 @@ public class TokenNameFinderToolTest {
File modelFile = Files.createTempFile("model", ".bin").toFile();
- try (BufferedOutputStream modelOut =
+ try (OutputStream modelOut =
new BufferedOutputStream(new FileOutputStream(modelFile))) {
model.serialize(modelOut);
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/AbstractEvalTest.java b/opennlp-tools/src/test/java/opennlp/tools/eval/AbstractEvalTest.java
index e65e17b5..30350a69 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/AbstractEvalTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/AbstractEvalTest.java
@@ -17,6 +17,7 @@
package opennlp.tools.eval;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.InputStream;
@@ -63,7 +64,7 @@ public abstract class AbstractEvalTest {
MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
- try (InputStream in = Files.newInputStream(file)) {
+ try (InputStream in = new BufferedInputStream(Files.newInputStream(file))) {
byte[] buf = new byte[65536];
int len;
while ((len = in.read(buf)) > 0) {
@@ -89,7 +90,7 @@ public abstract class AbstractEvalTest {
Collections.sort(paths);
for (Path p : paths) {
- try (InputStream in = Files.newInputStream(p)) {
+ try (InputStream in = new BufferedInputStream(Files.newInputStream(p))) {
byte[] buf = new byte[65536];
int len;
while ((len = in.read(buf)) > 0) {
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java
index 2ca843fd..1aa8837c 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java
@@ -96,15 +96,11 @@ public class TokenNameFinderModelTest {
File model = Files.createTempFile("nermodel", ".bin").toFile();
- try {
- FileOutputStream modelOut = new FileOutputStream(model);
+ try (FileOutputStream modelOut = new FileOutputStream(model)) {
nameFinderModel.serialize(modelOut);
-
- modelOut.close();
-
Assertions.assertTrue(model.exists());
} finally {
- model.delete();
+ Assertions.assertTrue(model.delete());
FileUtil.deleteDirectory(resourcesFolder.toFile());
}
}