You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2016/12/24 15:28:30 UTC
opennlp git commit: OPENNLP-871: Clean up code base for release
Repository: opennlp
Updated Branches:
refs/heads/trunk 1d1c41945 -> 9cb610bcb
OPENNLP-871: Clean up code base for release
This closes #20
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/9cb610bc
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/9cb610bc
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/9cb610bc
Branch: refs/heads/trunk
Commit: 9cb610bcbc3c541a3326d6504056d5526876c4cb
Parents: 1d1c419
Author: smarthi <sm...@apache.org>
Authored: Fri Dec 23 20:52:40 2016 -0500
Committer: Kottmann <jo...@apache.org>
Committed: Sat Dec 24 16:27:49 2016 +0100
----------------------------------------------------------------------
.../brat/BratNameSampleStreamFactory.java | 5 +-
.../ConstitParseSampleStream.java | 6 +-
.../formats/muc/DocumentSplitterStream.java | 2 +-
.../muc/Muc6NameSampleStreamFactory.java | 10 +--
.../tools/formats/muc/MucElementNames.java | 2 +-
.../formats/muc/MucNameContentHandler.java | 12 +--
.../tools/formats/muc/MucNameSampleStream.java | 2 +-
.../opennlp/tools/formats/muc/SgmlParser.java | 2 +-
.../formats/ontonotes/DocumentToLineStream.java | 2 +-
.../ontonotes/OntoNotesNameSampleStream.java | 8 +-
.../OntoNotesNameSampleStreamFactory.java | 20 ++---
.../OntoNotesParseSampleStreamFactory.java | 18 ++---
.../tools/lemmatizer/DictionaryLemmatizer.java | 5 +-
.../opennlp/tools/ml/maxent/GISTrainer.java | 14 ++--
.../tools/ml/model/GenericModelReader.java | 25 ++++---
.../ml/model/OnePassRealValueDataIndexer.java | 6 +-
.../naivebayes/BinaryNaiveBayesModelWriter.java | 2 +-
.../tools/ml/naivebayes/NaiveBayesTrainer.java | 2 +-
.../perceptron/BinaryPerceptronModelWriter.java | 2 +-
.../ml/perceptron/PerceptronModelWriter.java | 8 +-
.../tools/ml/perceptron/PerceptronTrainer.java | 43 ++++-------
.../PlainTextPerceptronModelWriter.java | 6 +-
.../SimplePerceptronSequenceTrainer.java | 47 ++++++------
.../opennlp/tools/namefind/NameFinderME.java | 14 ++--
.../java/opennlp/tools/ngram/NGramModel.java | 11 +--
.../tools/parser/AbstractBottomUpParser.java | 6 +-
.../tools/parser/AbstractContextGenerator.java | 1 -
.../tools/parser/ChunkContextGenerator.java | 2 +-
.../treeinsert/AttachContextGenerator.java | 10 +--
.../treeinsert/BuildContextGenerator.java | 2 +-
.../treeinsert/CheckContextGenerator.java | 2 +-
.../opennlp/tools/parser/treeinsert/Parser.java | 34 +++------
.../parser/treeinsert/ParserEventStream.java | 77 ++++++++++----------
.../java/opennlp/tools/postag/POSModel.java | 8 +-
.../java/opennlp/tools/postag/POSTaggerME.java | 15 ++--
.../opennlp/tools/util/BaseToolFactory.java | 6 +-
.../tools/eval/OntoNotes4NameFinderEval.java | 22 ++----
.../tools/eval/OntoNotes4ParserEval.java | 22 ++----
.../tools/eval/OntoNotes4PosTaggerEval.java | 22 ++----
.../tools/postag/POSTaggerFactoryTest.java | 28 ++++---
40 files changed, 219 insertions(+), 312 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java
index b7a8c41..669c699 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java
@@ -18,10 +18,7 @@
package opennlp.tools.formats.brat;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
-import java.io.InputStream;
-
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
@@ -75,7 +72,7 @@ public class BratNameSampleStreamFactory extends AbstractSampleStreamFactory<Nam
* Checks that non of the passed values are null.
*
* @param objects
- * @return
+ * @return true or false
*/
private boolean notNull(Object... objects) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java
index e908cb1..1964131 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java
@@ -36,7 +36,7 @@ public class ConstitParseSampleStream extends FilterObjectStream<byte[], Parse>
private SAXParser saxParser;
- private List<Parse> parses = new ArrayList<Parse>();
+ private List<Parse> parses = new ArrayList<>();
protected ConstitParseSampleStream(ObjectStream<byte[]> samples) {
super(samples);
@@ -57,12 +57,12 @@ public class ConstitParseSampleStream extends FilterObjectStream<byte[], Parse>
if (xmlbytes != null) {
- List<Parse> producedParses = new ArrayList<Parse>();
+ List<Parse> producedParses = new ArrayList<>();
try {
saxParser.parse(new ByteArrayInputStream(xmlbytes), new ConstitDocumentHandler(producedParses));
} catch (SAXException e) {
//TODO update after Java6 upgrade
- throw (IOException) new IOException(e.getMessage()).initCause(e);
+ throw new IOException(e.getMessage(), e);
}
parses.addAll(producedParses);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java
index 257505d..9ac9514 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java
@@ -30,7 +30,7 @@ class DocumentSplitterStream extends FilterObjectStream<String, String> {
private static final String DOC_START_ELEMENT = "<DOC>";
private static final String DOC_END_ELEMENT = "</DOC>";
- private List<String> docs = new ArrayList<String>();
+ private List<String> docs = new ArrayList<>();
DocumentSplitterStream(ObjectStream<String> samples) {
super(samples);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java
index b76613c..496253b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java
@@ -18,9 +18,7 @@
package opennlp.tools.formats.muc;
import java.io.File;
-import java.io.FileFilter;
import java.nio.charset.Charset;
-
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.StreamFactoryRegistry;
@@ -55,12 +53,8 @@ public class Muc6NameSampleStreamFactory extends AbstractSampleStreamFactory<Nam
Tokenizer tokenizer = new TokenizerME(tokenizerModel);
ObjectStream<String> mucDocStream = new FileToStringSampleStream(
- new DirectorySampleStream(params.getData(), new FileFilter() {
-
- public boolean accept(File file) {
- return StringUtil.toLowerCase(file.getName()).endsWith(".sgm");
- }
- }, false), Charset.forName("UTF-8"));
+ new DirectorySampleStream(params.getData(),
+ file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), Charset.forName("UTF-8"));
return new MucNameSampleStream(tokenizer, mucDocStream);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java
index 35b499d..6574fd0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java
@@ -32,7 +32,7 @@ class MucElementNames {
static final Set<String> CONTENT_ELEMENTS;
static {
- Set<String> contentElementNames = new HashSet<String>();
+ Set<String> contentElementNames = new HashSet<>();
contentElementNames.add(MucElementNames.HEADLINE_ELEMENT);
contentElementNames.add(MucElementNames.DATELINE_ELEMENT);
contentElementNames.add(MucElementNames.DD_ELEMENT);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
index 7300251..4d6d3a4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
@@ -43,7 +43,7 @@ public class MucNameContentHandler extends SgmlParser.ContentHandler {
private static final Set<String> EXPECTED_TYPES;
static {
- Set<String> types = new HashSet<String>();
+ Set<String> types = new HashSet<>();
types.add("PERSON");
types.add("ORGANIZATION");
@@ -55,7 +55,7 @@ public class MucNameContentHandler extends SgmlParser.ContentHandler {
EXPECTED_TYPES = Collections.unmodifiableSet(types);
- Set<String> nameElements = new HashSet<String>();
+ Set<String> nameElements = new HashSet<>();
nameElements.add(ENTITY_ELEMENT_NAME);
nameElements.add(TIME_ELEMENT_NAME);
nameElements.add(NUM_ELEMENT_NAME);
@@ -65,12 +65,12 @@ public class MucNameContentHandler extends SgmlParser.ContentHandler {
private final Tokenizer tokenizer;
private final List<NameSample> storedSamples;
- boolean isInsideContentElement = false;
- private final List<String> text = new ArrayList<String>();
+ private boolean isInsideContentElement = false;
+ private final List<String> text = new ArrayList<>();
private boolean isClearAdaptiveData = false;
- private final Stack<Span> incompleteNames = new Stack<Span>();
+ private final Stack<Span> incompleteNames = new Stack<>();
- private List<Span> names = new ArrayList<Span>();
+ private List<Span> names = new ArrayList<>();
public MucNameContentHandler(Tokenizer tokenizer,
List<NameSample> storedSamples) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java
index 530302d..281df5d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java
@@ -31,7 +31,7 @@ public class MucNameSampleStream extends FilterObjectStream<String, NameSample>
private final Tokenizer tokenizer;
- private List<NameSample> storedSamples = new ArrayList<NameSample>();
+ private List<NameSample> storedSamples = new ArrayList<>();
protected MucNameSampleStream(Tokenizer tokenizer, ObjectStream<String> samples) {
super(samples);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java
index fd18f6f..e85e995 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java
@@ -75,7 +75,7 @@ public class SgmlParser {
// value chars
// " <- end
- Map<String, String> attributes = new HashMap<String, String>();
+ Map<String, String> attributes = new HashMap<>();
StringBuilder key = new StringBuilder();
StringBuilder value = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java
index 4cab6ea..eb8b3e2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java
@@ -41,7 +41,7 @@ public class DocumentToLineStream extends SegmenterObjectStream<String, String>
// documents must be empty line terminated
if (!lines.get(lines.size() - 1).trim().isEmpty()) {
- lines = new ArrayList<String>(lines);
+ lines = new ArrayList<>(lines);
lines.add("");
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
index 770a698..744e134 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
@@ -42,12 +42,12 @@ public class OntoNotesNameSampleStream extends
private final Map<String, String> tokenConversionMap;
- private List<NameSample> nameSamples = new LinkedList<NameSample>();
+ private List<NameSample> nameSamples = new LinkedList<>();
public OntoNotesNameSampleStream(ObjectStream<String> samples) {
super(samples);
- Map<String, String> tokenConversionMap = new HashMap<String, String>();
+ Map<String, String> tokenConversionMap = new HashMap<>();
tokenConversionMap.put("-LRB-", "(");
tokenConversionMap.put("-RRB-", ")");
tokenConversionMap.put("-LSB-", "[");
@@ -107,8 +107,8 @@ public class OntoNotesNameSampleStream extends
String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
- List<Span> entities = new LinkedList<Span>();
- List<String> cleanedTokens = new ArrayList<String>(tokens.length);
+ List<Span> entities = new LinkedList<>();
+ List<String> cleanedTokens = new ArrayList<>(tokens.length);
int tokenIndex = 0;
int entityBeginIndex = -1;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java
index 88b1424..c0dfdbd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java
@@ -18,9 +18,7 @@
package opennlp.tools.formats.ontonotes;
import java.io.File;
-import java.io.FileFilter;
import java.nio.charset.Charset;
-
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.formats.AbstractSampleStreamFactory;
@@ -41,19 +39,15 @@ public class OntoNotesNameSampleStreamFactory extends
OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class);
ObjectStream<File> documentStream = new DirectorySampleStream(new File(
- params.getOntoNotesDir()), new FileFilter() {
-
- public boolean accept(File file) {
- if (file.isFile()) {
- return file.getName().endsWith(".name");
- }
+ params.getOntoNotesDir()), file -> {
+ if (file.isFile()) {
+ return file.getName().endsWith(".name");
+ }
- return file.isDirectory();
- }
- }, true);
+ return file.isDirectory();
+ }, true);
- return new OntoNotesNameSampleStream(new FileToStringSampleStream(
- documentStream, Charset.forName("UTF-8")));
+ return new OntoNotesNameSampleStream(new FileToStringSampleStream(documentStream, Charset.forName("UTF-8")));
}
public static void registerFactory() {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java
index e77edcf..a99bc4e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java
@@ -18,9 +18,7 @@
package opennlp.tools.formats.ontonotes;
import java.io.File;
-import java.io.FileFilter;
import java.nio.charset.Charset;
-
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.formats.AbstractSampleStreamFactory;
@@ -31,7 +29,6 @@ import opennlp.tools.util.ObjectStream;
public class OntoNotesParseSampleStreamFactory extends AbstractSampleStreamFactory<Parse> {
-
protected OntoNotesParseSampleStreamFactory() {
super(OntoNotesFormatParameters.class);
}
@@ -41,16 +38,13 @@ public class OntoNotesParseSampleStreamFactory extends AbstractSampleStreamFacto
OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class);
ObjectStream<File> documentStream = new DirectorySampleStream(new File(
- params.getOntoNotesDir()), new FileFilter() {
-
- public boolean accept(File file) {
- if (file.isFile()) {
- return file.getName().endsWith(".parse");
- }
+ params.getOntoNotesDir()), file -> {
+ if (file.isFile()) {
+ return file.getName().endsWith(".parse");
+ }
- return file.isDirectory();
- }
- }, true);
+ return file.isDirectory();
+ }, true);
// We need file to line here ... and that is probably best doen with the plain text stream
// lets copy it over here, refactor it, and then at some point we replace the current version
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
index 683b97c..6d8b51d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
@@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
/**
* Lemmatize by simple dictionary lookup into a hashmap built from a file
@@ -36,7 +37,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
/**
* The hashmap containing the dictionary.
*/
- private final HashMap<List<String>, String> dictMap;
+ private final Map<List<String>, String> dictMap;
/**
* Construct a hashmap from the input tab separated dictionary.
@@ -66,7 +67,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
*
* @return dictMap the Map
*/
- public HashMap<List<String>, String> getDictMap() {
+ public Map<List<String>, String> getDictMap() {
return this.dictMap;
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java
index 05a5424..7f087b0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java
@@ -20,16 +20,12 @@
package opennlp.tools.ml.maxent;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
import opennlp.tools.ml.model.DataIndexer;
import opennlp.tools.ml.model.EvalParameters;
import opennlp.tools.ml.model.Event;
@@ -256,7 +252,7 @@ class GISTrainer {
modelExpects = new MutableContext[threads][];
- /************** Incorporate all of the needed info ******************/
+ /* Incorporate all of the needed info *****/
display("Incorporating indexed data for training... \n");
contexts = di.getContexts();
values = di.getValues();
@@ -387,7 +383,7 @@ class GISTrainer {
display("...done.\n");
- /***************** Find the parameters ************************/
+ /* Find the parameters *****/
if (threads == 1)
display("Computing model parameters ...\n");
else
@@ -395,7 +391,7 @@ class GISTrainer {
findParameters(iterations, correctionConstant);
- /*************** Create and return the model ******************/
+ /* Create and return the model ****/
// To be compatible with old models the correction constant is always 1
return new GISModel(params, predLabels, outcomeLabels, 1, evalParams.getCorrectionParam());
@@ -405,7 +401,7 @@ class GISTrainer {
private void findParameters(int iterations, double correctionConstant) {
int threads=modelExpects.length;
ExecutorService executor = Executors.newFixedThreadPool(threads);
- CompletionService<ModelExpactationComputeTask> completionService=new ExecutorCompletionService<GISTrainer.ModelExpactationComputeTask>(executor);
+ CompletionService<ModelExpactationComputeTask> completionService = new ExecutorCompletionService<>(executor);
double prevLL = 0.0;
double currLL;
display("Performing " + iterations + " iterations.\n");
@@ -571,7 +567,7 @@ class GISTrainer {
}
for (int i=0; i<numberOfThreads; i++) {
- ModelExpactationComputeTask finishedTask = null;
+ ModelExpactationComputeTask finishedTask;
try {
finishedTask = completionService.take().get();
} catch (InterruptedException e) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/model/GenericModelReader.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/GenericModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/GenericModelReader.java
index fc5da33..37c000d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/GenericModelReader.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/GenericModelReader.java
@@ -41,20 +41,21 @@ public class GenericModelReader extends AbstractModelReader {
public void checkModelType() throws IOException {
String modelType = readUTF();
- if (modelType.equals("Perceptron")) {
- delegateModelReader = new PerceptronModelReader(this.dataReader);
- }
- else if (modelType.equals("GIS")) {
- delegateModelReader = new GISModelReader(this.dataReader);
- }
- else if (modelType.equals("QN")) {
+ switch (modelType) {
+ case "Perceptron":
+ delegateModelReader = new PerceptronModelReader(this.dataReader);
+ break;
+ case "GIS":
+ delegateModelReader = new GISModelReader(this.dataReader);
+ break;
+ case "QN":
delegateModelReader = new QNModelReader(this.dataReader);
- }
- else if (modelType.equals("NaiveBayes")) {
+ break;
+ case "NaiveBayes":
delegateModelReader = new NaiveBayesModelReader(this.dataReader);
- }
- else {
- throw new IOException("Unknown model format: "+modelType);
+ break;
+ default:
+ throw new IOException("Unknown model format: " + modelType);
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java
index 8bb05d2..272f608 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java
@@ -73,12 +73,12 @@ public class OnePassRealValueDataIndexer extends OnePassDataIndexer {
}
protected List<ComparableEvent> index(LinkedList<Event> events, Map<String,Integer> predicateIndex) {
- Map<String,Integer> omap = new HashMap<String,Integer>();
+ Map<String,Integer> omap = new HashMap<>();
int numEvents = events.size();
int outcomeCount = 0;
- List<ComparableEvent> eventsToCompare = new ArrayList<ComparableEvent>(numEvents);
- List<Integer> indexedContext = new ArrayList<Integer>();
+ List<ComparableEvent> eventsToCompare = new ArrayList<>(numEvents);
+ List<Integer> indexedContext = new ArrayList<>();
for (int eventIndex=0; eventIndex<numEvents; eventIndex++) {
Event ev = events.removeFirst();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/BinaryNaiveBayesModelWriter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/BinaryNaiveBayesModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/BinaryNaiveBayesModelWriter.java
index f91d640..a00e8e5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/BinaryNaiveBayesModelWriter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/BinaryNaiveBayesModelWriter.java
@@ -31,7 +31,7 @@ import opennlp.tools.ml.model.AbstractModel;
* Model writer that saves models in binary format.
*/
public class BinaryNaiveBayesModelWriter extends NaiveBayesModelWriter {
- DataOutputStream output;
+ private DataOutputStream output;
/**
* Constructor which takes a NaiveBayesModel and a File and prepares itself to
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java
index b39b659..7547630 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java
@@ -137,7 +137,7 @@ public class NaiveBayesTrainer extends AbstractEventTrainer {
display("...done.\n");
- /*************** Create and return the model ******************/
+ /* Create and return the model ****/
return new NaiveBayesModel(finalParameters, predLabels, outcomeLabels);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/BinaryPerceptronModelWriter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/BinaryPerceptronModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/BinaryPerceptronModelWriter.java
index df13efe..8d0085b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/BinaryPerceptronModelWriter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/BinaryPerceptronModelWriter.java
@@ -31,7 +31,7 @@ import opennlp.tools.ml.model.AbstractModel;
* Model writer that saves models in binary format.
*/
public class BinaryPerceptronModelWriter extends PerceptronModelWriter {
- DataOutputStream output;
+ private DataOutputStream output;
/**
* Constructor which takes a GISModel and a File and prepares itself to
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java
index 274e7df..e958c3c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java
@@ -40,7 +40,7 @@ public abstract class PerceptronModelWriter extends AbstractModelWriter {
protected Context[] PARAMS;
protected String[] OUTCOME_LABELS;
protected String[] PRED_LABELS;
- int numOutcomes;
+ private int numOutcomes;
public PerceptronModelWriter (AbstractModel model) {
@@ -99,15 +99,15 @@ public abstract class PerceptronModelWriter extends AbstractModelWriter {
protected List<List<ComparablePredicate>> computeOutcomePatterns(ComparablePredicate[] sorted) {
ComparablePredicate cp = sorted[0];
- List<List<ComparablePredicate>> outcomePatterns = new ArrayList<List<ComparablePredicate>>();
- List<ComparablePredicate> newGroup = new ArrayList<ComparablePredicate>();
+ List<List<ComparablePredicate>> outcomePatterns = new ArrayList<>();
+ List<ComparablePredicate> newGroup = new ArrayList<>();
for (ComparablePredicate predicate : sorted) {
if (cp.compareTo(predicate) == 0) {
newGroup.add(predicate);
} else {
cp = predicate;
outcomePatterns.add(newGroup);
- newGroup = new ArrayList<ComparablePredicate>();
+ newGroup = new ArrayList<>();
newGroup.add(predicate);
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java
index d90d856..68883fa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java
@@ -86,12 +86,7 @@ public class PerceptronTrainer extends AbstractEventTrainer {
public boolean isValid() {
String algorithmName = getAlgorithm();
-
- if (algorithmName != null && !(PERCEPTRON_VALUE.equals(algorithmName))) {
- return false;
- }
-
- return true;
+ return !(algorithmName != null && !(PERCEPTRON_VALUE.equals(algorithmName)));
}
public boolean isSortAndMerge() {
@@ -217,7 +212,7 @@ public class PerceptronTrainer extends AbstractEventTrainer {
display("...done.\n");
- /*************** Create and return the model ******************/
+ /* Create and return the model *************/
return new PerceptronModel(finalParameters, predLabels, outcomeLabels);
}
@@ -229,7 +224,7 @@ public class PerceptronTrainer extends AbstractEventTrainer {
for (int oi = 0; oi < numOutcomes; oi++)
allOutcomesPattern[oi] = oi;
- /** Stores the estimated parameter value of each predicate during iteration. */
+ /* Stores the estimated parameter value of each predicate during iteration. */
MutableContext[] params = new MutableContext[numPreds];
for (int pi = 0; pi < numPreds; pi++) {
params[pi] = new MutableContext(allOutcomesPattern,new double[numOutcomes]);
@@ -239,7 +234,7 @@ public class PerceptronTrainer extends AbstractEventTrainer {
EvalParameters evalParams = new EvalParameters(params,numOutcomes);
- /** Stores the sum of parameter values of each predicate over many iterations. */
+ /* Stores the sum of parameter values of each predicate over many iterations. */
MutableContext[] summedParams = new MutableContext[numPreds];
if (useAverage) {
for (int pi = 0; pi < numPreds; pi++) {
@@ -273,7 +268,7 @@ public class PerceptronTrainer extends AbstractEventTrainer {
for (int ei = 0; ei < numUniqueEvents; ei++) {
int targetOutcome = outcomeList[ei];
- for (int ni=0; ni<this.numTimesEventsSeen[ei]; ni++) {
+ for (int ni = 0; ni < this.numTimesEventsSeen[ei]; ni++) {
// Compute the model's prediction according to the current parameters.
double[] modelDistribution = new double[numOutcomes];
@@ -295,8 +290,8 @@ public class PerceptronTrainer extends AbstractEventTrainer {
params[pi].updateParameter(targetOutcome, stepsize);
params[pi].updateParameter(maxOutcome, -stepsize);
} else {
- params[pi].updateParameter(targetOutcome, stepsize*values[ei][ci]);
- params[pi].updateParameter(maxOutcome, -stepsize*values[ei][ci]);
+ params[pi].updateParameter(targetOutcome, stepsize * values[ei][ci]);
+ params[pi].updateParameter(maxOutcome, -stepsize * values[ei][ci]);
}
}
}
@@ -309,36 +304,28 @@ public class PerceptronTrainer extends AbstractEventTrainer {
// Calculate the training accuracy and display.
double trainingAccuracy = (double) numCorrect / numEvents;
- if (i < 10 || (i%10) == 0)
- display(". (" + numCorrect + "/" + numEvents+") " + trainingAccuracy + "\n");
+ if (i < 10 || (i % 10) == 0)
+ display(". (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n");
// TODO: Make averaging configurable !!!
boolean doAveraging;
- if (useAverage && useSkippedlAveraging && (i < 20 || isPerfectSquare(i))) {
- doAveraging = true;
- }
- else if (useAverage) {
- doAveraging = true;
- }
- else {
- doAveraging = false;
- }
+ doAveraging = useAverage && useSkippedlAveraging && (i < 20 || isPerfectSquare(i)) || useAverage;
if (doAveraging) {
numTimesSummed++;
for (int pi = 0; pi < numPreds; pi++)
- for (int aoi=0;aoi<numOutcomes;aoi++)
+ for (int aoi = 0; aoi < numOutcomes; aoi++)
summedParams[pi].updateParameter(aoi, params[pi].getParameters()[aoi]);
}
// If the tolerance is greater than the difference between the
// current training accuracy and all of the previous three
// training accuracies, stop training.
- if (Math.abs(prevAccuracy1-trainingAccuracy) < tolerance
- && Math.abs(prevAccuracy2-trainingAccuracy) < tolerance
- && Math.abs(prevAccuracy3-trainingAccuracy) < tolerance) {
+ if (Math.abs(prevAccuracy1 - trainingAccuracy) < tolerance
+ && Math.abs(prevAccuracy2 - trainingAccuracy) < tolerance
+ && Math.abs(prevAccuracy3 - trainingAccuracy) < tolerance) {
display("Stopping: change in training set accuracy less than " + tolerance + "\n");
break;
}
@@ -419,7 +406,7 @@ public class PerceptronTrainer extends AbstractEventTrainer {
// See whether a number is a perfect square. Inefficient, but fine
// for our purposes.
- private final static boolean isPerfectSquare (int n) {
+ private static boolean isPerfectSquare (int n) {
int root = (int)Math.sqrt(n);
return root*root == n;
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java
index 0322cc5..ffde6c6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java
@@ -21,20 +21,18 @@ package opennlp.tools.ml.perceptron;
import java.io.BufferedWriter;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.zip.GZIPOutputStream;
-
import opennlp.tools.ml.model.AbstractModel;
/**
* Model writer that saves models in plain text format.
*/
public class PlainTextPerceptronModelWriter extends PerceptronModelWriter {
- BufferedWriter output;
+ private BufferedWriter output;
/**
* Constructor which takes a PerceptronModel and a File and prepares itself to
@@ -45,7 +43,7 @@ public class PlainTextPerceptronModelWriter extends PerceptronModelWriter {
* @param f The File in which the model is to be persisted.
*/
public PlainTextPerceptronModelWriter (AbstractModel model, File f)
- throws IOException, FileNotFoundException {
+ throws IOException {
super(model);
if (f.getName().endsWith(".gz")) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
index 43537c5..f14ebf1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
@@ -20,9 +20,10 @@
package opennlp.tools.ml.perceptron;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
-
import opennlp.tools.ml.AbstractEventModelSequenceTrainer;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.DataIndexer;
@@ -61,7 +62,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
private String[] outcomeLabels;
- double[] modelDistribution;
+ private double[] modelDistribution;
/** Stores the average parameter values of each predicate during iteration. */
private MutableContext[] averageParams;
@@ -75,13 +76,13 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
private MutableContext[] params;
private boolean useAverage;
private int[][][] updates;
- private int VALUE = 0;
- private int ITER = 1;
- private int EVENT = 2;
+ private static final int VALUE = 0;
+ private static final int ITER = 1;
+ private static final int EVENT = 2;
private int[] allOutcomesPattern;
private String[] predLabels;
- int numSequences;
+ private int numSequences;
public SimplePerceptronSequenceTrainer() {
}
@@ -94,12 +95,8 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
String algorithmName = getAlgorithm();
- if (algorithmName != null
- && !(PERCEPTRON_SEQUENCE_VALUE.equals(algorithmName))) {
- return false;
- }
-
- return true;
+ return !(algorithmName != null
+ && !(PERCEPTRON_SEQUENCE_VALUE.equals(algorithmName)));
}
public AbstractModel doTrain(SequenceStream events) throws IOException {
@@ -127,7 +124,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
outcomeList = di.getOutcomeList();
predLabels = di.getPredLabels();
- pmap = new HashMap<String, Integer>();
+ pmap = new HashMap<>();
for (int i = 0; i < predLabels.length; i++) {
pmap.put(predLabels[i], i);
@@ -139,7 +136,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
this.iterations = iterations;
outcomeLabels = di.getOutcomeLabels();
- omap = new HashMap<String,Integer>();
+ omap = new HashMap<>();
for (int oli=0;oli<outcomeLabels.length;oli++) {
omap.put(outcomeLabels[oli], oli);
}
@@ -180,7 +177,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
findParameters(iterations);
display("...done.\n");
- /*************** Create and return the model ******************/
+ /* Create and return the model ****/
String[] updatedPredLabels = predLabels;
/*
String[] updatedPredLabels = new String[pmap.size()];
@@ -225,9 +222,9 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
int numCorrect = 0;
int oei=0;
int si=0;
- Map<String,Float>[] featureCounts = new Map[numOutcomes];
+ List<Map<String,Float>> featureCounts = new ArrayList<>(numOutcomes);
for (int oi=0;oi<numOutcomes;oi++) {
- featureCounts[oi] = new HashMap<String,Float>();
+ featureCounts.add(new HashMap<>());
}
PerceptronModel model = new PerceptronModel(params,predLabels,pmap,outcomeLabels);
@@ -249,7 +246,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
}
if (update) {
for (int oi=0;oi<numOutcomes;oi++) {
- featureCounts[oi].clear();
+ featureCounts.get(oi).clear();
}
//System.err.print("train:");for (int ei=0;ei<events.length;ei++) {System.err.print(" "+events[ei].getOutcome());} System.err.println();
//training feature count computation
@@ -262,14 +259,14 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
if (values != null) {
value = values[ci];
}
- Float c = featureCounts[oi].get(contextStrings[ci]);
+ Float c = featureCounts.get(oi).get(contextStrings[ci]);
if (c == null) {
c = value;
}
else {
c+=value;
}
- featureCounts[oi].put(contextStrings[ci], c);
+ featureCounts.get(oi).put(contextStrings[ci], c);
}
}
//evaluation feature count computation
@@ -283,7 +280,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
if (values != null) {
value = values[ci];
}
- Float c = featureCounts[oi].get(contextStrings[ci]);
+ Float c = featureCounts.get(oi).get(contextStrings[ci]);
if (c == null) {
c = -1*value;
}
@@ -291,19 +288,19 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
c-=value;
}
if (c == 0f) {
- featureCounts[oi].remove(contextStrings[ci]);
+ featureCounts.get(oi).remove(contextStrings[ci]);
}
else {
- featureCounts[oi].put(contextStrings[ci], c);
+ featureCounts.get(oi).put(contextStrings[ci], c);
}
}
}
for (int oi=0;oi<numOutcomes;oi++) {
- for (String feature : featureCounts[oi].keySet()) {
+ for (String feature : featureCounts.get(oi).keySet()) {
int pi = pmap.get(feature);
if (pi != -1) {
//System.err.println(si+" "+outcomeLabels[oi]+" "+feature+" "+featureCounts[oi].get(feature));
- params[pi].updateParameter(oi, featureCounts[oi].get(feature));
+ params[pi].updateParameter(oi, featureCounts.get(oi).get(feature));
if (useAverage) {
if (updates[pi][oi][VALUE] != 0) {
averageParams[pi].updateParameter(oi,updates[pi][oi][VALUE]*(numSequences*(iteration-updates[pi][oi][ITER])+(si-updates[pi][oi][EVENT])));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
index 00e001c..ff8c143 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
@@ -45,7 +45,6 @@ import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.AdditionalContextFeatureGenerator;
import opennlp.tools.util.featuregen.BigramNameFeatureGenerator;
import opennlp.tools.util.featuregen.CachedFeatureGenerator;
-import opennlp.tools.util.featuregen.FeatureGeneratorResourceProvider;
import opennlp.tools.util.featuregen.GeneratorFactory;
import opennlp.tools.util.featuregen.OutcomePriorFeatureGenerator;
import opennlp.tools.util.featuregen.PreviousMapFeatureGenerator;
@@ -114,14 +113,11 @@ public class NameFinderME implements TokenNameFinder {
if (generatorDescriptor != null) {
featureGenerator = GeneratorFactory.create(new ByteArrayInputStream(
- generatorDescriptor), new FeatureGeneratorResourceProvider() {
-
- public Object getResource(String key) {
- if (resources != null) {
- return resources.get(key);
- }
- return null;
+ generatorDescriptor), key -> {
+ if (resources != null) {
+ return resources.get(key);
}
+ return null;
});
} else {
featureGenerator = null;
@@ -440,7 +436,7 @@ public class NameFinderME implements TokenNameFinder {
*/
public static Span[] dropOverlappingSpans(Span spans[]) {
- List<Span> sortedSpans = new ArrayList<Span>(spans.length);
+ List<Span> sortedSpans = new ArrayList<>(spans.length);
Collections.addAll(sortedSpans, spans);
Collections.sort(sortedSpans);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
index 0e597e0..6d912da 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
@@ -25,12 +25,10 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
-
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.dictionary.serializer.Attributes;
import opennlp.tools.dictionary.serializer.DictionarySerializer;
import opennlp.tools.dictionary.serializer.Entry;
-import opennlp.tools.dictionary.serializer.EntryInserter;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.StringList;
import opennlp.tools.util.StringUtil;
@@ -57,11 +55,9 @@ public class NGramModel implements Iterable<StringList>{
*
* @param in the serialized model stream
* @throws IOException
- * @throws InvalidFormatException
*/
- public NGramModel(InputStream in) throws IOException, InvalidFormatException {
- DictionarySerializer.create(in, new EntryInserter() {
- public void insert(Entry entry) throws InvalidFormatException {
+ public NGramModel(InputStream in) throws IOException {
+ DictionarySerializer.create(in, entry -> {
int count;
String countValueString = null;
@@ -82,8 +78,7 @@ public class NGramModel implements Iterable<StringList>{
add(entry.getTokens());
setCount(entry.getTokens(), count);
- }
- });
+ });
}
/**
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
index ff4bea8..cc8eab5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
@@ -349,7 +349,7 @@ public abstract class AbstractBottomUpParser implements Parser {
return new Parse[] {completeParses.first()};
}
else {
- List<Parse> topParses = new ArrayList<Parse>(numParses);
+ List<Parse> topParses = new ArrayList<>(numParses);
while(!completeParses.isEmpty() && topParses.size() < numParses) {
Parse tp = completeParses.extract();
topParses.add(tp);
@@ -460,9 +460,6 @@ public abstract class AbstractBottomUpParser implements Parser {
words[i] = children[i].getCoveredText();
}
Sequence[] ts = tagger.topKSequences(words);
-// if (ts.length == 0) {
-// System.err.println("no tag sequence");
-// }
Parse[] newParses = new Parse[ts.length];
for (int i = 0; i < ts.length; i++) {
String[] tags = ts[i].getOutcomes().toArray(new String[words.length]);
@@ -475,7 +472,6 @@ public abstract class AbstractBottomUpParser implements Parser {
double prob = probs[j];
newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j));
newParses[i].addProb(Math.log(prob));
- //newParses[i].show();
}
}
return newParses;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java
index 09d3b0f..85de098 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java
@@ -118,7 +118,6 @@ public abstract class AbstractContextGenerator {
if (punct1s != null) {
for (Iterator<Parse> pi = punct1s.iterator();pi.hasNext();) {
Parse p = pi.next();
-// String punct = punct(p,c1.index);
String punctbo = punctbo(p,c1.index <= 0 ? c1.index -1 : c1.index);
//punctbo(1);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
index d3d8303..b3de9b2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
@@ -115,7 +115,7 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
String cacheKey = i +t_2+t1+t0+t1+t2+p_2+p_1;
if (contextsCache!= null) {
if (wordsKey == words) {
- String[] contexts = (String[]) contextsCache.get(cacheKey);
+ String[] contexts = contextsCache.get(cacheKey);
if (contexts != null) {
return contexts;
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java
index 9bce4cc..93b23ad 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java
@@ -60,8 +60,7 @@ public class AttachContextGenerator extends AbstractContextGenerator {
* @return A set of contextual features about this attachment.
*/
public String[] getContext(Parse[] constituents, int index, List<Parse> rightFrontier, int rfi) {
- List<String> features = new ArrayList<String>(100);
- int nodeDistance = rfi;
+ List<String> features = new ArrayList<>(100);
Parse fn = rightFrontier.get(rfi);
Parse fp = null;
if (rfi+1 < rightFrontier.size()) {
@@ -147,9 +146,9 @@ public class AttachContextGenerator extends AbstractContextGenerator {
*/
int headDistance = (p0.getHeadIndex()-fn.getHeadIndex());
features.add("hd="+headDistance);
- features.add("nd="+nodeDistance);
+ features.add("nd="+ rfi);
- features.add("nd="+p0.getType()+"."+nodeDistance);
+ features.add("nd="+p0.getType()+"."+ rfi);
features.add("hd="+p0.getType()+"."+headDistance);
//features.add("fs="+rightFrontier.size());
//paired punct features
@@ -158,9 +157,6 @@ public class AttachContextGenerator extends AbstractContextGenerator {
features.add("quotematch");//? not generating feature correctly
}
- else {
- //features.add("noquotematch");
- }
}
return features.toArray(new String[features.size()]);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java
index 8677171..589a1dc 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java
@@ -115,7 +115,7 @@ public class BuildContextGenerator extends AbstractContextGenerator {
Cons c1 = new Cons(consp1,consbop1,1,true);
Cons c2 = new Cons(consp2,consbop2,2,true);
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
features.add("default");
//unigrams
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java
index 694d65d..de6824d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java
@@ -42,7 +42,7 @@ public class CheckContextGenerator extends AbstractContextGenerator {
}
public String[] getContext(Parse parent, Parse[] constituents, int index, boolean trimFrontier) {
- List<String> features = new ArrayList<String>(100);
+ List<String> features = new ArrayList<>(100);
//default
features.add("default");
Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
index e2c62d7..85d6cf9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
@@ -140,7 +140,7 @@ public class Parser extends AbstractBottomUpParser {
* @return The right frontier of the specified parse tree.
*/
public static List<Parse> getRightFrontier(Parse root,Set<String> punctSet) {
- List<Parse> rf = new LinkedList<Parse>();
+ List<Parse> rf = new LinkedList<>();
Parse top;
if (AbstractBottomUpParser.TOP_NODE.equals(root.getType()) ||
AbstractBottomUpParser.INC_NODE.equals(root.getType())) {
@@ -154,7 +154,7 @@ public class Parser extends AbstractBottomUpParser {
Parse[] kids = top.getChildren();
top = kids[kids.length-1];
}
- return new ArrayList<Parse>(rf);
+ return new ArrayList<>(rf);
}
private void setBuilt(Parse p) {
@@ -193,22 +193,12 @@ public class Parser extends AbstractBottomUpParser {
private boolean isBuilt(Parse p) {
String l = p.getLabel();
- if (l == null) {
- return false;
- }
- else {
- return l.startsWith(Parser.BUILT);
- }
+ return l != null && l.startsWith(Parser.BUILT);
}
private boolean isComplete(Parse p) {
String l = p.getLabel();
- if (l == null) {
- return false;
- }
- else {
- return l.endsWith(Parser.COMPLETE);
- }
+ return l != null && l.endsWith(Parser.COMPLETE);
}
@Override
@@ -226,9 +216,9 @@ public class Parser extends AbstractBottomUpParser {
@Override
protected Parse[] advanceParses(Parse p, double probMass) {
double q = 1 - probMass;
- /** The index of the node which will be labeled in this iteration of advancing the parse. */
+ /* The index of the node which will be labeled in this iteration of advancing the parse. */
int advanceNodeIndex;
- /** The node which will be labeled in this iteration of advancing the parse. */
+ /* The node which will be labeled in this iteration of advancing the parse. */
Parse advanceNode=null;
Parse[] originalChildren = p.getChildren();
Parse[] children = collapsePunctuation(originalChildren,punctSet);
@@ -254,7 +244,7 @@ public class Parser extends AbstractBottomUpParser {
}
int originalZeroIndex = mapParseIndex(0,children,originalChildren);
int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren);
- List<Parse> newParsesList = new ArrayList<Parse>();
+ List<Parse> newParsesList = new ArrayList<>();
//call build model
buildModel.eval(buildContextGenerator.getContext(children, advanceNodeIndex), bprobs);
double doneProb = bprobs[doneIndex];
@@ -262,7 +252,7 @@ public class Parser extends AbstractBottomUpParser {
if (1-doneProb > q) {
double bprobSum = 0;
while (bprobSum < probMass) {
- /** The largest unadvanced labeling. */
+ /* The largest unadvanced labeling. */
int max = 0;
for (int pi = 1; pi < bprobs.length; pi++) { //for each build outcome
if (bprobs[pi] > bprobs[max]) {
@@ -434,7 +424,7 @@ public class Parser extends AbstractBottomUpParser {
ObjectStream<Parse> parseSamples, HeadRules rules, TrainingParameters mlParams)
throws IOException {
- Map<String, String> manifestInfoEntries = new HashMap<String, String>();
+ Map<String, String> manifestInfoEntries = new HashMap<>();
System.err.println("Building dictionary");
Dictionary mdict = buildDictionary(parseSamples, rules, mlParams);
@@ -457,7 +447,7 @@ public class Parser extends AbstractBottomUpParser {
System.err.println("Training builder");
ObjectStream<Event> bes = new ParserEventStream(parseSamples, rules,
ParserEventTypeEnum.BUILD, mdict);
- Map<String, String> buildReportMap = new HashMap<String, String>();
+ Map<String, String> buildReportMap = new HashMap<>();
EventTrainer buildTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("build"), buildReportMap);
MaxentModel buildModel = buildTrainer.train(bes);
@@ -469,7 +459,7 @@ public class Parser extends AbstractBottomUpParser {
System.err.println("Training checker");
ObjectStream<Event> kes = new ParserEventStream(parseSamples, rules,
ParserEventTypeEnum.CHECK);
- Map<String, String> checkReportMap = new HashMap<String, String>();
+ Map<String, String> checkReportMap = new HashMap<>();
EventTrainer checkTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("check"), checkReportMap);
MaxentModel checkModel = checkTrainer.train(kes);
@@ -481,7 +471,7 @@ public class Parser extends AbstractBottomUpParser {
System.err.println("Training attacher");
ObjectStream<Event> attachEvents = new ParserEventStream(parseSamples, rules,
ParserEventTypeEnum.ATTACH);
- Map<String, String> attachReportMap = new HashMap<String, String>();
+ Map<String, String> attachReportMap = new HashMap<>();
EventTrainer attachTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("attach"), attachReportMap);
MaxentModel attachModel = attachTrainer.train(attachEvents);
opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach");
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
index c693ef6..5949414 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
@@ -25,7 +25,6 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-
import opennlp.tools.cmdline.SystemInputStreamFactory;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
@@ -37,7 +36,6 @@ import opennlp.tools.parser.HeadRules;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParseSampleStream;
import opennlp.tools.parser.ParserEventTypeEnum;
-import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -73,7 +71,7 @@ public class ParserEventStream extends AbstractParserEventStream {
* @return a set of parent nodes.
*/
private Map<Parse, Integer> getNonAdjoinedParent(Parse node) {
- Map<Parse, Integer> parents = new HashMap<Parse, Integer>();
+ Map<Parse, Integer> parents = new HashMap<>();
Parse parent = node.getParent();
int index = indexOf(node,parent);
parents.put(parent, index);
@@ -130,11 +128,11 @@ public class ParserEventStream extends AbstractParserEventStream {
@Override
protected void addParseEvents(List<Event> parseEvents, Parse[] chunks) {
- /** Frontier nodes built from node in a completed parse. Specifically,
+ /* Frontier nodes built from node in a completed parse. Specifically,
* they have all their children regardless of the stage of parsing.*/
- List<Parse> rightFrontier = new ArrayList<Parse>();
- List<Parse> builtNodes = new ArrayList<Parse>();
- /** Nodes which characterize what the parse looks like to the parser as its being built.
+ List<Parse> rightFrontier = new ArrayList<>();
+ List<Parse> builtNodes = new ArrayList<>();
+ /* Nodes which characterize what the parse looks like to the parser as its being built.
* Specifically, these nodes don't have all their children attached like the parents of
* the chunk nodes do.*/
Parse[] currentChunks = new Parse[chunks.length];
@@ -200,7 +198,7 @@ public class ParserEventStream extends AbstractParserEventStream {
}
//attach node
String attachType = null;
- /** Node selected for attachment. */
+ /* Node selected for attachment. */
Parse attachNode = null;
int attachNodeIndex = -1;
if (ci == 0){
@@ -208,7 +206,7 @@ public class ParserEventStream extends AbstractParserEventStream {
top.insert(currentChunks[ci]);
}
else {
- /** Right frontier consisting of partially-built nodes based on current state of the parse.*/
+ /* Right frontier consisting of partially-built nodes based on current state of the parse.*/
List<Parse> currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet);
if (currentRightFrontier.size() != rightFrontier.size()) {
System.err.println("fontiers mis-aligned: "+currentRightFrontier.size()+" != "+rightFrontier.size()+" "+currentRightFrontier+" "+rightFrontier);
@@ -333,7 +331,7 @@ public class ParserEventStream extends AbstractParserEventStream {
}
}
- public static void main(String[] args) throws java.io.IOException, InvalidFormatException {
+ public static void main(String[] args) throws java.io.IOException {
if (args.length == 0) {
System.err.println("Usage ParserEventStream -[tag|chunk|build|attach] [-fun] [-dict dictionary] [-model model] head_rules < parses");
System.exit(1);
@@ -345,35 +343,36 @@ public class ParserEventStream extends AbstractParserEventStream {
AbstractModel model = null;
while (ai < args.length && args[ai].startsWith("-")) {
- if (args[ai].equals("-build")) {
- etype = ParserEventTypeEnum.BUILD;
- }
- else if (args[ai].equals("-attach")) {
- etype = ParserEventTypeEnum.ATTACH;
- }
- else if (args[ai].equals("-chunk")) {
- etype = ParserEventTypeEnum.CHUNK;
- }
- else if (args[ai].equals("-check")) {
- etype = ParserEventTypeEnum.CHECK;
- }
- else if (args[ai].equals("-tag")) {
- etype = ParserEventTypeEnum.TAG;
- }
- else if (args[ai].equals("-fun")) {
- fun = true;
- }
- else if (args[ai].equals("-dict")) {
- ai++;
- dict = new Dictionary(new FileInputStream(args[ai]));
- }
- else if (args[ai].equals("-model")) {
- ai++;
- model = (new SuffixSensitiveGISModelReader(new File(args[ai]))).getModel();
- }
- else {
- System.err.println("Invalid option " + args[ai]);
- System.exit(1);
+ switch (args[ai]) {
+ case "-build":
+ etype = ParserEventTypeEnum.BUILD;
+ break;
+ case "-attach":
+ etype = ParserEventTypeEnum.ATTACH;
+ break;
+ case "-chunk":
+ etype = ParserEventTypeEnum.CHUNK;
+ break;
+ case "-check":
+ etype = ParserEventTypeEnum.CHECK;
+ break;
+ case "-tag":
+ etype = ParserEventTypeEnum.TAG;
+ break;
+ case "-fun":
+ fun = true;
+ break;
+ case "-dict":
+ ai++;
+ dict = new Dictionary(new FileInputStream(args[ai]));
+ break;
+ case "-model":
+ ai++;
+ model = (new SuffixSensitiveGISModelReader(new File(args[ai]))).getModel();
+ break;
+ default:
+ System.err.println("Invalid option " + args[ai]);
+ System.exit(1);
}
ai++;
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
index 34b9f79..21522a5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
@@ -43,7 +43,7 @@ public final class POSModel extends BaseModel {
private static final String COMPONENT_NAME = "POSTaggerME";
- public static final String POS_MODEL_ENTRY_NAME = "pos.model";
+ static final String POS_MODEL_ENTRY_NAME = "pos.model";
public POSModel(String languageCode, SequenceClassificationModel<String> posModel,
Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) {
@@ -78,15 +78,15 @@ public final class POSModel extends BaseModel {
checkArtifactMap();
}
- public POSModel(InputStream in) throws IOException, InvalidFormatException {
+ public POSModel(InputStream in) throws IOException {
super(COMPONENT_NAME, in);
}
- public POSModel(File modelFile) throws IOException, InvalidFormatException {
+ public POSModel(File modelFile) throws IOException {
super(COMPONENT_NAME, modelFile);
}
- public POSModel(URL modelURL) throws IOException, InvalidFormatException {
+ public POSModel(URL modelURL) throws IOException {
super(COMPONENT_NAME, modelURL);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
index e4c1c1b..366a91a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
@@ -24,9 +24,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import java.util.StringTokenizer;
import java.util.concurrent.atomic.AtomicInteger;
-
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.EventModelSequenceTrainer;
@@ -45,7 +43,6 @@ import opennlp.tools.util.StringList;
import opennlp.tools.util.StringUtil;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.StringPattern;
-import opennlp.tools.util.model.ModelType;
/**
* A part-of-speech tagger that uses maximum entropy. Tries to predict whether
@@ -117,7 +114,7 @@ public class POSTaggerME implements POSTagger {
this.model = model.getPosSequenceModel();
}
else {
- this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
+ this.model = new opennlp.tools.ml.BeamSearch<>(beamSize,
model.getPosModel(), 0);
}
@@ -127,7 +124,7 @@ public class POSTaggerME implements POSTagger {
* Retrieves an array of all possible part-of-speech tags from the
* tagger.
*
- * @return
+ * @return String[]
*/
public String[] getAllPosTags() {
return model.getOutcomes();
@@ -237,7 +234,7 @@ public class POSTaggerME implements POSTagger {
POSContextGenerator contextGenerator = posFactory.getPOSContextGenerator();
- Map<String, String> manifestInfoEntries = new HashMap<String, String>();
+ Map<String, String> manifestInfoEntries = new HashMap<>();
TrainerType trainerType = TrainerFactory.getTrainerType(trainParams.getSettings());
@@ -302,7 +299,7 @@ public class POSTaggerME implements POSTagger {
// the data structure will store the word, the tag, and the number of
// occurrences
- Map<String, Map<String, AtomicInteger>> newEntries = new HashMap<String, Map<String, AtomicInteger>>();
+ Map<String, Map<String, AtomicInteger>> newEntries = new HashMap<>();
POSSample sample;
while ((sample = samples.read()) != null) {
String[] words = sample.getSentence();
@@ -319,7 +316,7 @@ public class POSTaggerME implements POSTagger {
}
if (!newEntries.containsKey(word)) {
- newEntries.put(word, new HashMap<String, AtomicInteger>());
+ newEntries.put(word, new HashMap<>());
}
String[] dictTags = dict.getTags(word);
@@ -346,7 +343,7 @@ public class POSTaggerME implements POSTagger {
// add it to the dictionary
for (Entry<String, Map<String, AtomicInteger>> wordEntry : newEntries
.entrySet()) {
- List<String> tagsForWord = new ArrayList<String>();
+ List<String> tagsForWord = new ArrayList<>();
for (Entry<String, AtomicInteger> entry : wordEntry.getValue().entrySet()) {
if (entry.getValue().get() >= cutoff) {
tagsForWord.add(entry.getKey());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
index 873c2c4..499aa8f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
@@ -63,7 +63,7 @@ public abstract class BaseToolFactory {
*/
@SuppressWarnings("rawtypes")
public Map<String, ArtifactSerializer> createArtifactSerializersMap() {
- return new HashMap<String, ArtifactSerializer>();
+ return new HashMap<>();
}
/**
@@ -75,7 +75,7 @@ public abstract class BaseToolFactory {
* populated by sub-classes.
*/
public Map<String, Object> createArtifactMap() {
- return new HashMap<String, Object>();
+ return new HashMap<>();
}
/**
@@ -84,7 +84,7 @@ public abstract class BaseToolFactory {
* @return the manifest entries to added to the model manifest
*/
public Map<String, String> createManifestEntries() {
- return new HashMap<String, String>();
+ return new HashMap<>();
}
/**
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
index f81cd36..737fae3 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
@@ -18,13 +18,8 @@
package opennlp.tools.eval;
import java.io.File;
-import java.io.FileFilter;
import java.io.IOException;
import java.nio.charset.Charset;
-
-import org.junit.Assert;
-import org.junit.Test;
-
import opennlp.tools.formats.DirectorySampleStream;
import opennlp.tools.formats.convert.FileToStringSampleStream;
import opennlp.tools.formats.ontonotes.OntoNotesNameSampleStream;
@@ -35,6 +30,8 @@ import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelUtil;
+import org.junit.Assert;
+import org.junit.Test;
public class OntoNotes4NameFinderEval {
@@ -42,16 +39,13 @@ public class OntoNotes4NameFinderEval {
throws IOException {
ObjectStream<File> documentStream = new DirectorySampleStream(new File(
- EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), new FileFilter() {
-
- public boolean accept(File file) {
- if (file.isFile()) {
- return file.getName().endsWith(".name");
- }
+ EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> {
+ if (file.isFile()) {
+ return file.getName().endsWith(".name");
+ }
- return file.isDirectory();
- }
- }, true);
+ return file.isDirectory();
+ }, true);
ObjectStream<NameSample> samples = new OntoNotesNameSampleStream(new FileToStringSampleStream(
documentStream, Charset.forName("UTF-8")));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
index 84185e0..087ab7e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
@@ -18,15 +18,10 @@
package opennlp.tools.eval;
import java.io.File;
-import java.io.FileFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
-
-import org.junit.Assert;
-import org.junit.Test;
-
import opennlp.tools.formats.DirectorySampleStream;
import opennlp.tools.formats.convert.FileToStringSampleStream;
import opennlp.tools.formats.ontonotes.DocumentToLineStream;
@@ -38,6 +33,8 @@ import opennlp.tools.parser.lang.en.HeadRulesTest;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelUtil;
+import org.junit.Assert;
+import org.junit.Test;
public class OntoNotes4ParserEval {
@@ -45,16 +42,13 @@ public class OntoNotes4ParserEval {
throws IOException {
ObjectStream<File> documentStream = new DirectorySampleStream(new File(
- EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), new FileFilter() {
-
- public boolean accept(File file) {
- if (file.isFile()) {
- return file.getName().endsWith(".parse");
- }
+ EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> {
+ if (file.isFile()) {
+ return file.getName().endsWith(".parse");
+ }
- return file.isDirectory();
- }
- }, true);
+ return file.isDirectory();
+ }, true);
OntoNotesParseSampleStream samples = new OntoNotesParseSampleStream(
new DocumentToLineStream(new FileToStringSampleStream(
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
index ca1676a..fb25836 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
@@ -18,13 +18,8 @@
package opennlp.tools.eval;
import java.io.File;
-import java.io.FileFilter;
import java.io.IOException;
import java.nio.charset.Charset;
-
-import org.junit.Assert;
-import org.junit.Test;
-
import opennlp.tools.formats.DirectorySampleStream;
import opennlp.tools.formats.convert.FileToStringSampleStream;
import opennlp.tools.formats.convert.ParseToPOSSampleStream;
@@ -35,6 +30,8 @@ import opennlp.tools.postag.POSTaggerFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelUtil;
+import org.junit.Assert;
+import org.junit.Test;
public class OntoNotes4PosTaggerEval {
@@ -42,16 +39,13 @@ public class OntoNotes4PosTaggerEval {
throws IOException {
ObjectStream<File> documentStream = new DirectorySampleStream(new File(
- EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), new FileFilter() {
-
- public boolean accept(File file) {
- if (file.isFile()) {
- return file.getName().endsWith(".parse");
- }
+ EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> {
+ if (file.isFile()) {
+ return file.getName().endsWith(".parse");
+ }
- return file.isDirectory();
- }
- }, true);
+ return file.isDirectory();
+ }, true);
ParseToPOSSampleStream samples = new ParseToPOSSampleStream(new OntoNotesParseSampleStream(
new DocumentToLineStream(
http://git-wip-us.apache.org/repos/asf/opennlp/blob/9cb610bc/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
index 2f72124..ec76565 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
@@ -18,14 +18,10 @@
package opennlp.tools.postag;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-
-import org.junit.Test;
-
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSContextGenerator;
@@ -37,7 +33,9 @@ import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
-import opennlp.tools.util.model.ModelType;
+import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
/**
* Tests for the {@link POSTaggerFactory} class.
@@ -53,7 +51,7 @@ public class POSTaggerFactoryTest {
return new WordTagSampleStream(new PlainTextByLineStream(in, UTF_8));
}
- static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory)
+ private static POSModel trainPOSModel(POSTaggerFactory factory)
throws IOException {
return POSTaggerME.train("en", createSampleStream(),
TrainingParameters.defaultParams(), factory);
@@ -66,8 +64,8 @@ public class POSTaggerFactoryTest {
.getResourceAsStream("TagDictionaryCaseSensitive.xml")));
Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);
- POSModel posModel = trainPOSModel(ModelType.MAXENT,
- new DummyPOSTaggerFactory(dic, posDict));
+ POSModel posModel = trainPOSModel(
+ new DummyPOSTaggerFactory(dic, posDict));
POSTaggerFactory factory = posModel.getFactory();
assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
@@ -84,7 +82,7 @@ public class POSTaggerFactoryTest {
assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator);
assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator);
- assertTrue(factory.getDictionary() instanceof Dictionary);
+ assertTrue(factory.getDictionary() != null);
}
@Test
@@ -93,14 +91,14 @@ public class POSTaggerFactoryTest {
.getResourceAsStream("TagDictionaryCaseSensitive.xml"));
Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);
- POSModel posModel = trainPOSModel(ModelType.MAXENT,
- new POSTaggerFactory(dic, posDict));
+ POSModel posModel = trainPOSModel(
+ new POSTaggerFactory(dic, posDict));
POSTaggerFactory factory = posModel.getFactory();
assertTrue(factory.getTagDictionary() instanceof POSDictionary);
- assertTrue(factory.getPOSContextGenerator() instanceof POSContextGenerator);
+ assertTrue(factory.getPOSContextGenerator() != null);
assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
- assertTrue(factory.getDictionary() instanceof Dictionary);
+ assertTrue(factory.getDictionary() != null);
ByteArrayOutputStream out = new ByteArrayOutputStream();
posModel.serialize(out);
@@ -110,9 +108,9 @@ public class POSTaggerFactoryTest {
factory = fromSerialized.getFactory();
assertTrue(factory.getTagDictionary() instanceof POSDictionary);
- assertTrue(factory.getPOSContextGenerator() instanceof POSContextGenerator);
+ assertTrue(factory.getPOSContextGenerator() != null);
assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
- assertTrue(factory.getDictionary() instanceof Dictionary);
+ assertTrue(factory.getDictionary() != null);
}
@Test(expected = InvalidFormatException.class)