You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2016/12/23 17:47:18 UTC
[1/7] opennlp git commit: Replace manual array copy [Forced Update!]
Repository: opennlp
Updated Branches:
refs/heads/897 13957112e -> 6d1f82448 (forced update)
Replace manual array copy
See issue OPENNLP-871
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/dd2aaccc
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/dd2aaccc
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/dd2aaccc
Branch: refs/heads/897
Commit: dd2aaccc0737eace57a949f2d7d7291bddb81ee9
Parents: 06c27b3
Author: Joern Kottmann <jo...@apache.org>
Authored: Wed Dec 21 17:12:52 2016 +0100
Committer: Joern Kottmann <jo...@apache.org>
Committed: Wed Dec 21 17:12:52 2016 +0100
----------------------------------------------------------------------
.../src/main/java/opennlp/tools/ml/maxent/GISTrainer.java | 4 +---
.../main/java/opennlp/tools/ml/model/AbstractModelReader.java | 4 +---
.../main/java/opennlp/tools/parser/AbstractBottomUpParser.java | 4 +---
.../java/opennlp/tools/parser/chunking/ParserEventStream.java | 4 +---
4 files changed, 4 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd2aaccc/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java
index 9919bb0..05a5424 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java
@@ -361,9 +361,7 @@ class GISTrainer {
}
else {
outcomePattern = new int[numActiveOutcomes];
- for (int aoi=0;aoi<numActiveOutcomes;aoi++) {
- outcomePattern[aoi] = activeOutcomes[aoi];
- }
+ System.arraycopy(activeOutcomes, 0, outcomePattern, 0, numActiveOutcomes);
}
}
params[pi] = new MutableContext(outcomePattern,new double[numActiveOutcomes]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd2aaccc/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
index 6a4b642..6c26214 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
@@ -135,9 +135,7 @@ public abstract class AbstractModelReader {
for (int i=0; i<outcomePatterns.length; i++) {
//construct outcome pattern
int[] outcomePattern = new int[outcomePatterns[i].length-1];
- for (int k=1; k<outcomePatterns[i].length; k++) {
- outcomePattern[k-1] = outcomePatterns[i][k];
- }
+ System.arraycopy(outcomePatterns[i], 1, outcomePattern, 0, outcomePatterns[i].length - 1);
//System.err.println("outcomePattern "+i+" of "+outcomePatterns.length+" with "+outcomePatterns[i].length+" outcomes ");
//populate parameters for each context which uses this outcome pattern.
for (int j=0; j<outcomePatterns[i][0]; j++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd2aaccc/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
index 10c3f0e..ff4bea8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
@@ -576,9 +576,7 @@ public abstract class AbstractBottomUpParser implements Parser {
if (ci+2 < chunks.length) window[wi++] = chunks[ci+2].getHead().getCoveredText();
if (wi < 5) {
String[] subWindow = new String[wi];
- for (int swi=0;swi<wi;swi++) {
- subWindow[swi]=window[swi];
- }
+ System.arraycopy(window, 0, subWindow, 0, wi);
window = subWindow;
}
if (window.length >=3) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd2aaccc/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
index afaed99..8ded9ec 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
@@ -93,9 +93,7 @@ public class ParserEventStream extends AbstractParserEventStream {
if (!type.equals(AbstractBottomUpParser.TOP_NODE)) {
reducedChunks = new Parse[chunks.length-(reduceEnd-reduceStart+1)+1]; //total - num_removed + 1 (for new node)
//insert nodes before reduction
- for (int ri = 0; ri< reduceStart; ri++) {
- reducedChunks[ri]=chunks[ri];
- }
+ System.arraycopy(chunks, 0, reducedChunks, 0, reduceStart);
//insert reduced node
reducedChunks[reduceStart]=parent;
//propagate punctuation sets
[3/7] opennlp git commit: OPENNLP-899: Replace deprecated code from
Tokenizer Trainer
Posted by jo...@apache.org.
OPENNLP-899: Replace deprecated code from Tokenizer Trainer
This closes #13
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/927ee0fc
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/927ee0fc
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/927ee0fc
Branch: refs/heads/897
Commit: 927ee0fc7f47c7c6ccbd72a184aaa6215d77943c
Parents: bbda5de
Author: smarthi <sm...@apache.org>
Authored: Wed Dec 21 14:35:49 2016 -0500
Committer: Kottmann <jo...@apache.org>
Committed: Thu Dec 22 16:25:32 2016 +0100
----------------------------------------------------------------------
.../tools/tokenize/TokenizerCrossValidator.java | 4 +---
.../opennlp/tools/tokenize/TokenizerFactory.java | 2 +-
.../java/opennlp/tools/tokenize/TokenizerME.java | 8 ++++----
.../opennlp/tools/tokenize/TokenizerModel.java | 17 ++++++-----------
.../opennlp/tools/eval/ArvoresDeitadasEval.java | 2 +-
.../opennlp/tools/tokenize/TokenizerTestUtil.java | 11 +++++------
.../opennlp/uima/tokenize/TokenizerTrainer.java | 6 +++++-
7 files changed, 23 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/927ee0fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
index 811165c..3ca3c1d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
@@ -94,9 +94,7 @@ public class TokenizerCrossValidator {
partitioner.next();
// Maybe throws IOException if temporary file handling fails ...
- TokenizerModel model;
-
- model = TokenizerME.train(trainingSampleStream, this.factory, params);
+ TokenizerModel model = TokenizerME.train(trainingSampleStream, this.factory, params);
TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model), listeners);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/927ee0fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java
index 4c67ce1..f9e789a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java
@@ -37,7 +37,7 @@ public class TokenizerFactory extends BaseToolFactory {
private String languageCode;
private Dictionary abbreviationDictionary;
- private Boolean useAlphaNumericOptimization = null;
+ private Boolean useAlphaNumericOptimization;
private Pattern alphaNumericPattern;
private static final String ABBREVIATIONS_ENTRY_NAME = "abbreviations.dictionary";
http://git-wip-us.apache.org/repos/asf/opennlp/blob/927ee0fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
index 5412c28..4c4c638 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
@@ -52,7 +52,7 @@ import opennlp.tools.util.model.ModelUtil;
* must be instantiated which can share one <code>TokenizerModel</code> instance
* to safe memory.
* <p>
- * To train a new model {{@link #train(String, ObjectStream, boolean, TrainingParameters)} method
+ * To train a new model {{@link #train(ObjectStream, TokenizerFactory, TrainingParameters)} method
* can be used.
* <p>
* Sample usage:
@@ -250,8 +250,7 @@ public class TokenizerME extends AbstractTokenizer {
MaxentModel maxentModel = trainer.train(eventStream);
- return new TokenizerModel(maxentModel, manifestInfoEntries,
- factory);
+ return new TokenizerModel(maxentModel, manifestInfoEntries, factory);
}
/**
@@ -338,7 +337,8 @@ public class TokenizerME extends AbstractTokenizer {
*/
public static TokenizerModel train(String languageCode, ObjectStream<TokenSample> samples,
boolean useAlphaNumericOptimization) throws IOException {
- return train(languageCode, samples, useAlphaNumericOptimization, ModelUtil.createDefaultTrainingParameters());
+ return train(samples, TokenizerFactory.create(null, languageCode, null, useAlphaNumericOptimization, null),
+ ModelUtil.createDefaultTrainingParameters());
}
/**
http://git-wip-us.apache.org/repos/asf/opennlp/blob/927ee0fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
index 1af60f4..e63b946 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
@@ -124,7 +124,7 @@ public final class TokenizerModel extends BaseModel {
* @throws IOException if reading from the stream fails in anyway
* @throws InvalidFormatException if the stream doesn't have the expected format
*/
- public TokenizerModel(InputStream in) throws IOException, InvalidFormatException {
+ public TokenizerModel(InputStream in) throws IOException {
super(COMPONENT_NAME, in);
}
@@ -134,9 +134,8 @@ public final class TokenizerModel extends BaseModel {
* @param modelFile the file containing the tokenizer model
*
* @throws IOException if reading from the stream fails in anyway
- * @throws InvalidFormatException if the stream doesn't have the expected format
*/
- public TokenizerModel(File modelFile) throws IOException, InvalidFormatException {
+ public TokenizerModel(File modelFile) throws IOException {
super(COMPONENT_NAME, modelFile);
}
@@ -146,9 +145,8 @@ public final class TokenizerModel extends BaseModel {
* @param modelURL the URL pointing to the tokenizer model
*
* @throws IOException if reading from the stream fails in anyway
- * @throws InvalidFormatException if the stream doesn't have the expected format
*/
- public TokenizerModel(URL modelURL) throws IOException, InvalidFormatException {
+ public TokenizerModel(URL modelURL) throws IOException {
super(COMPONENT_NAME, modelURL);
}
@@ -196,10 +194,7 @@ public final class TokenizerModel extends BaseModel {
}
public boolean useAlphaNumericOptimization() {
- if (getFactory() != null) {
- return getFactory().isUseAlphaNumericOptmization();
- }
- return false;
+ return getFactory() != null && getFactory().isUseAlphaNumericOptmization();
}
public static void main(String[] args) throws IOException {
@@ -224,8 +219,8 @@ public final class TokenizerModel extends BaseModel {
AbstractModel model = new BinaryGISModelReader(new DataInputStream(
new FileInputStream(modelName))).getModel();
- TokenizerModel packageModel = new TokenizerModel(languageCode, model,
- alphaNumericOptimization);
+ TokenizerModel packageModel = new TokenizerModel(model, null,
+ TokenizerFactory.create(null, languageCode, null, alphaNumericOptimization, null));
OutputStream out = null;
try {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/927ee0fc/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java
index 35f0e00..33d6ffe 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java
@@ -72,7 +72,7 @@ public class ArvoresDeitadasEval {
private static final String LANG = "pt";
- private static final TrainingParameters getPerceptronZeroCutoff() {
+ private static TrainingParameters getPerceptronZeroCutoff() {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
params.put(TrainingParameters.ALGORITHM_PARAM,
PerceptronTrainer.PERCEPTRON_VALUE);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/927ee0fc/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
index f8eb85b..ffe5101 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
@@ -19,11 +19,10 @@
package opennlp.tools.tokenize;
import static java.nio.charset.StandardCharsets.UTF_8;
+
import java.io.IOException;
-import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
-
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.util.CollectionObjectStream;
import opennlp.tools.util.InputStreamFactory;
@@ -38,7 +37,7 @@ import opennlp.tools.util.TrainingParameters;
public class TokenizerTestUtil {
static TokenizerModel createSimpleMaxentTokenModel() throws IOException {
- List<TokenSample> samples = new ArrayList<TokenSample>();
+ List<TokenSample> samples = new ArrayList<>();
samples.add(new TokenSample("year", new Span[]{new Span(0, 4)}));
samples.add(new TokenSample("year,", new Span[]{
@@ -59,8 +58,8 @@ public class TokenizerTestUtil {
mlParams.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
mlParams.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
- return TokenizerME.train("en", new CollectionObjectStream<TokenSample>(samples), true,
- mlParams);
+ return TokenizerME.train(new CollectionObjectStream<>(samples),
+ TokenizerFactory.create(null, "en", null, true, null), mlParams);
}
static TokenizerModel createMaxentTokenModel() throws IOException {
@@ -75,7 +74,7 @@ public class TokenizerTestUtil {
mlParams.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
mlParams.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
- return TokenizerME.train("en", samples, true, mlParams);
+ return TokenizerME.train(samples, TokenizerFactory.create(null, "en", null, true, null), mlParams);
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/927ee0fc/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
index 66d1dfa..2b36051 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
@@ -32,6 +32,7 @@ import java.util.List;
import opennlp.tools.ml.maxent.GIS;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenSampleStream;
+import opennlp.tools.tokenize.TokenizerFactory;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.InputStreamFactory;
@@ -40,6 +41,7 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
+import opennlp.tools.util.model.ModelUtil;
import opennlp.uima.util.CasConsumerUtil;
import opennlp.uima.util.ContainingConstraint;
import opennlp.uima.util.OpennlpUtil;
@@ -257,7 +259,9 @@ public final class TokenizerTrainer extends CasConsumer_ImplBase {
samples = new SampleTraceStream<>(samples, samplesOut);
}
- tokenModel = TokenizerME.train(language, samples, isSkipAlphaNumerics);
+ tokenModel = TokenizerME.train(samples,
+ TokenizerFactory.create(null, language, null, isSkipAlphaNumerics, null),
+ ModelUtil.createDefaultTrainingParameters());
// dereference to allow garbage collection
tokenSamples = null;
[2/7] opennlp git commit: OPENNLP-871: Cleanup for Java 8
Posted by jo...@apache.org.
OPENNLP-871: Cleanup for Java 8
This closes #12
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/bbda5de4
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/bbda5de4
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/bbda5de4
Branch: refs/heads/897
Commit: bbda5de4098e0ffb884384a2fbbccae8b181742b
Parents: dd2aacc
Author: smarthi <sm...@apache.org>
Authored: Wed Dec 21 12:58:26 2016 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Wed Dec 21 19:17:41 2016 +0100
----------------------------------------------------------------------
.../tools/tokenize/TokenizerCrossValidator.java | 3 +-
.../opennlp/tools/tokenize/TokenizerME.java | 22 +++++-------
.../opennlp/tools/util/ObjectStreamUtils.java | 2 ++
.../tools/tokenize/TokenizerFactoryTest.java | 25 ++++++--------
.../opennlp/uima/chunker/ChunkerTrainer.java | 12 +++----
.../doccat/AbstractDocumentCategorizer.java | 4 +--
.../uima/doccat/DocumentCategorizer.java | 3 +-
.../uima/doccat/DocumentCategorizerTrainer.java | 10 ++----
.../uima/namefind/AbstractNameFinder.java | 6 ++--
.../uima/namefind/NameFinderTrainer.java | 24 ++++----------
.../opennlp/uima/normalizer/Normalizer.java | 5 +--
.../uima/normalizer/StringDictionary.java | 6 ++--
.../main/java/opennlp/uima/parser/Parser.java | 4 +--
.../java/opennlp/uima/postag/POSTagger.java | 4 +--
.../opennlp/uima/postag/POSTaggerTrainer.java | 17 +++++-----
.../sentdetect/SentenceDetectorTrainer.java | 10 +++---
.../opennlp/uima/tokenize/TokenizerTrainer.java | 35 ++++++++++----------
.../opennlp/uima/util/ContainingConstraint.java | 11 ++----
.../java/opennlp/uima/util/OpennlpUtil.java | 4 +--
.../main/java/opennlp/uima/util/UimaUtil.java | 2 +-
20 files changed, 84 insertions(+), 125 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
index 7a85d6a..811165c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
@@ -86,8 +86,7 @@ public class TokenizerCrossValidator {
*/
public void evaluate(ObjectStream<TokenSample> samples, int nFolds) throws IOException {
- CrossValidationPartitioner<TokenSample> partitioner =
- new CrossValidationPartitioner<TokenSample>(samples, nFolds);
+ CrossValidationPartitioner<TokenSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds);
while (partitioner.hasNext()) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
index 3d03943..5412c28 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
@@ -18,7 +18,6 @@
package opennlp.tools.tokenize;
import java.io.IOException;
-import java.io.ObjectStreamException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -26,7 +25,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
-
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.TrainerFactory;
@@ -127,8 +125,8 @@ public class TokenizerME extends AbstractTokenizer {
this.model = model.getMaxentModel();
this.useAlphaNumericOptimization = factory.isUseAlphaNumericOptmization();
- newTokens = new ArrayList<Span>();
- tokProbs = new ArrayList<Double>(50);
+ newTokens = new ArrayList<>();
+ tokProbs = new ArrayList<>(50);
}
/**
@@ -145,13 +143,13 @@ public class TokenizerME extends AbstractTokenizer {
this.model = model.getMaxentModel();
useAlphaNumericOptimization = model.useAlphaNumericOptimization();
- newTokens = new ArrayList<Span>();
- tokProbs = new ArrayList<Double>(50);
+ newTokens = new ArrayList<>();
+ tokProbs = new ArrayList<>(50);
}
private static Set<String> getAbbreviations(Dictionary abbreviations) {
if(abbreviations == null) {
- return Collections.<String>emptySet();
+ return Collections.emptySet();
}
return abbreviations.asStringSet();
}
@@ -241,7 +239,7 @@ public class TokenizerME extends AbstractTokenizer {
public static TokenizerModel train(ObjectStream<TokenSample> samples, TokenizerFactory factory,
TrainingParameters mlParams) throws IOException {
- Map<String, String> manifestInfoEntries = new HashMap<String, String>();
+ Map<String, String> manifestInfoEntries = new HashMap<>();
ObjectStream<Event> eventStream = new TokSpanEventStream(samples,
factory.isUseAlphaNumericOptmization(),
@@ -305,7 +303,7 @@ public class TokenizerME extends AbstractTokenizer {
throws IOException {
Factory factory = new Factory();
- Map<String, String> manifestInfoEntries = new HashMap<String, String>();
+ Map<String, String> manifestInfoEntries = new HashMap<>();
ObjectStream<Event> eventStream = new TokSpanEventStream(samples,
useAlphaNumericOptimization, factory.getAlphanumeric(languageCode),
@@ -334,16 +332,12 @@ public class TokenizerME extends AbstractTokenizer {
* @throws IOException it throws an {@link IOException} if an {@link IOException}
* is thrown during IO operations on a temp file which is
*
- * @throws ObjectStreamException if reading from the {@link ObjectStream} fails
- * created during training.
- *
- *
* @deprecated Use
* {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
* and pass in a {@link TokenizerFactory}
*/
public static TokenizerModel train(String languageCode, ObjectStream<TokenSample> samples,
- boolean useAlphaNumericOptimization) throws IOException, ObjectStreamException {
+ boolean useAlphaNumericOptimization) throws IOException {
return train(languageCode, samples, useAlphaNumericOptimization, ModelUtil.createDefaultTrainingParameters());
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java b/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java
index b9b1fe8..9ad98dd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java
@@ -31,6 +31,7 @@ public class ObjectStreamUtils {
*
* @return the object stream over the array elements
*/
+ @SafeVarargs
public static <T> ObjectStream<T> createObjectStream(final T... array) {
return new ObjectStream<T>() {
@@ -90,6 +91,7 @@ public class ObjectStreamUtils {
* @param streams
* @return
*/
+ @SafeVarargs
public static <T> ObjectStream<T> createObjectStream(final ObjectStream<T>... streams) {
for (ObjectStream<T> stream : streams) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
index 96d19a6..2fb3d40 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
@@ -18,19 +18,12 @@
package opennlp.tools.tokenize;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.regex.Pattern;
-
-import org.junit.Test;
-
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.tokenize.DummyTokenizerFactory.DummyContextGenerator;
@@ -40,6 +33,12 @@ import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
/**
* Tests for the {@link TokenizerFactory} class.
@@ -56,11 +55,10 @@ public class TokenizerFactoryTest {
private static TokenizerModel train(TokenizerFactory factory)
throws IOException {
- return TokenizerME.train(createSampleStream(),
- factory, TrainingParameters.defaultParams());
+ return TokenizerME.train(createSampleStream(), factory, TrainingParameters.defaultParams());
}
- static Dictionary loadAbbDictionary() throws IOException {
+ private static Dictionary loadAbbDictionary() throws IOException {
InputStream in = TokenizerFactoryTest.class.getClassLoader()
.getResourceAsStream("opennlp/tools/sentdetect/abb.xml");
@@ -76,7 +74,7 @@ public class TokenizerFactoryTest {
TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null));
TokenizerFactory factory = model.getFactory();
- assertTrue(factory.getAbbreviationDictionary() instanceof Dictionary);
+ assertTrue(factory.getAbbreviationDictionary() != null);
assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern()
@@ -92,7 +90,7 @@ public class TokenizerFactoryTest {
TokenizerModel fromSerialized = new TokenizerModel(in);
factory = fromSerialized.getFactory();
- assertTrue(factory.getAbbreviationDictionary() instanceof Dictionary);
+ assertTrue(factory.getAbbreviationDictionary() != null);
assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern()
@@ -130,8 +128,7 @@ public class TokenizerFactoryTest {
assertNull(factory.getAbbreviationDictionary());
assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
- assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern()
- .pattern());
+ assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern());
assertEquals(lang, factory.getLanguageCode());
assertEquals(lang, model.getLanguage());
assertFalse(factory.isUseAlphaNumericOptmization());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java
index d22879d..8c6232b 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java
@@ -66,7 +66,7 @@ import org.apache.uima.util.ProcessTrace;
*/
public class ChunkerTrainer extends CasConsumer_ImplBase {
- private List<ChunkSample> mChunkSamples = new ArrayList<ChunkSample>();
+ private List<ChunkSample> mChunkSamples = new ArrayList<>();
private UimaContext mContext;
@@ -82,8 +82,6 @@ public class ChunkerTrainer extends CasConsumer_ImplBase {
private Feature mChunkTagFeature;
- private Logger mLogger;
-
private String language;
/**
@@ -95,7 +93,7 @@ public class ChunkerTrainer extends CasConsumer_ImplBase {
mContext = getUimaContext();
- mLogger = mContext.getLogger();
+ Logger mLogger = mContext.getLogger();
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP Chunker Trainer.");
@@ -183,9 +181,9 @@ public class ChunkerTrainer extends CasConsumer_ImplBase {
Iterator<AnnotationFS> tokenIterator = tcas.createFilteredIterator(tokenIndex.iterator(),
containingConstraint);
- List<String> tokens = new ArrayList<String>();
- List<String> tags = new ArrayList<String>();;
- List<String> chunkTags = new ArrayList<String>();;
+ List<String> tokens = new ArrayList<>();
+ List<String> tags = new ArrayList<>();
+ List<String> chunkTags = new ArrayList<>();
while (tokenIterator.hasNext()) {
AnnotationFS tokenAnnotation = tokenIterator.next();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
index 5abfd76..d2591d5 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
@@ -41,8 +41,6 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase {
private UimaContext context;
- private Logger mLogger;
-
private opennlp.tools.doccat.DocumentCategorizer mCategorizer;
private Type mTokenType;
@@ -54,7 +52,7 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase {
this.context = context;
- mLogger = context.getLogger();
+ Logger mLogger = context.getLogger();
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer.");
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
index 5cf337f..cb71a50 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
@@ -56,8 +56,7 @@ public class DocumentCategorizer extends AbstractDocumentCategorizer {
protected void setBestCategory(CAS tcas, String bestCategory) {
FSIndex<AnnotationFS> categoryIndex = tcas.getAnnotationIndex(mCategoryType);
- AnnotationFS categoryAnnotation = categoryIndex.size() > 0 ?
- categoryIndex.iterator().next() : null;
+ AnnotationFS categoryAnnotation;
if (categoryIndex.size() > 0) {
categoryAnnotation = categoryIndex.iterator().next();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java
index 31c6392..5b35a82 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java
@@ -56,13 +56,9 @@ public class DocumentCategorizerTrainer extends CasConsumer_ImplBase {
private UimaContext mContext;
- private Logger mLogger;
-
private String mModelName;
- private List<DocumentSample> documentSamples = new ArrayList<DocumentSample>();
-
- private Type mTokenType;
+ private List<DocumentSample> documentSamples = new ArrayList<>();
private Type mCategoryType;
@@ -76,7 +72,7 @@ public class DocumentCategorizerTrainer extends CasConsumer_ImplBase {
mContext = getUimaContext();
- mLogger = mContext.getLogger();
+ Logger mLogger = mContext.getLogger();
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP Doccat Trainer.");
@@ -95,7 +91,7 @@ public class DocumentCategorizerTrainer extends CasConsumer_ImplBase {
String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.SENTENCE_TYPE_PARAMETER);
- mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
+ Type mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
String categoryTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
"opennlp.uima.doccat.CategoryType");
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
index c0bad5d..dbc604c 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
@@ -123,7 +123,7 @@ abstract class AbstractNameFinder extends CasAnnotator_ImplBase {
final AnnotationComboIterator sentenceNameCombo = new AnnotationComboIterator(cas,
mSentenceType, mNameType);
- List<AnnotationFS> removeAnnotations = new LinkedList<AnnotationFS>();
+ List<AnnotationFS> removeAnnotations = new LinkedList<>();
for (AnnotationIteratorPair annotationIteratorPair : sentenceNameCombo) {
for (AnnotationFS nameAnnotation : annotationIteratorPair.getSubIterator()) {
removeAnnotations.add(nameAnnotation);
@@ -140,9 +140,9 @@ abstract class AbstractNameFinder extends CasAnnotator_ImplBase {
for (AnnotationIteratorPair annotationIteratorPair : sentenceTokenCombo) {
- final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<AnnotationFS>();
+ final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<>();
- final List<String> sentenceTokenList = new LinkedList<String>();
+ final List<String> sentenceTokenList = new LinkedList<>();
for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
index dcc0ddc..6207580 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
@@ -126,7 +126,7 @@ public final class NameFinderTrainer extends CasConsumer_ImplBase {
// - Directly start indexing with a blocking sample stream, the indexer will then write everything
// to disk or could store the events much more space efficient in memory
- private List<NameSample> nameFinderSamples = new ArrayList<NameSample>();
+ private List<NameSample> nameFinderSamples = new ArrayList<>();
private TrainingParameters trainingParams;
/**
@@ -220,7 +220,7 @@ public final class NameFinderTrainer extends CasConsumer_ImplBase {
* @return
*/
private static <T> List<T> iteratorToList(Iterator<T> it) {
- List<T> list = new LinkedList<T>();
+ List<T> list = new LinkedList<>();
while (it.hasNext()) {
list.add(it.next());
@@ -231,19 +231,9 @@ public final class NameFinderTrainer extends CasConsumer_ImplBase {
private static boolean isContaining(AnnotationFS annotation,
AnnotationFS containtedAnnotation) {
- boolean isStartContaining = annotation.getBegin() <= containtedAnnotation
- .getBegin();
- if (!isStartContaining) {
- return false;
- }
-
- boolean isEndContaining = annotation.getEnd() >= containtedAnnotation
- .getEnd();
- if (!isEndContaining) {
- return false;
- }
+ boolean isStartContaining = annotation.getBegin() <= containtedAnnotation.getBegin();
+ return isStartContaining && annotation.getEnd() >= containtedAnnotation.getEnd();
- return true;
}
/**
@@ -258,7 +248,7 @@ public final class NameFinderTrainer extends CasConsumer_ImplBase {
*/
private static Span[] createNames(List<AnnotationFS> tokenList, List<AnnotationFS> entityAnnotations) {
- List<Span> nameList = new LinkedList<Span>();
+ List<Span> nameList = new LinkedList<>();
AnnotationFS currentEntity = null;
@@ -299,7 +289,7 @@ public final class NameFinderTrainer extends CasConsumer_ImplBase {
return nameList.toArray(new Span[nameList.size()]);
}
- /**
+ /*
* Process the given CAS object.
*/
/**
@@ -392,7 +382,7 @@ public final class NameFinderTrainer extends CasConsumer_ImplBase {
if (sampleTraceFile != null) {
samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
- samples = new SampleTraceStream<NameSample>(samples, samplesOut);
+ samples = new SampleTraceStream<>(samples, samplesOut);
}
Map<String, Object> resourceMap;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
index 39150ac..2118a0a 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
@@ -65,7 +65,7 @@ public class Normalizer extends CasAnnotator_ImplBase {
private static final Set<String> SUPPORTED_TYPES;
static {
- Set<String> supportedTypes = new HashSet<String>();
+ Set<String> supportedTypes = new HashSet<>();
supportedTypes.add(CAS.TYPE_NAME_STRING);
supportedTypes.add(CAS.TYPE_NAME_BYTE);
@@ -232,9 +232,6 @@ public class Normalizer extends CasAnnotator_ImplBase {
.getName())) {
nameAnnotation
.setDoubleValue(mStructureFeature, number.doubleValue());
- } else {
- // assert false : mStructureFeature.getRange().getName()
- // + " is not supported!";
}
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
index e55f4ae..5631544 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
@@ -34,7 +34,7 @@ import opennlp.tools.util.StringList;
// lookup a string for given token list
public class StringDictionary {
- private Map<StringList, String> entries = new HashMap<StringList, String>();
+ private Map<StringList, String> entries = new HashMap<>();
public StringDictionary() {
}
@@ -44,10 +44,8 @@ public class StringDictionary {
*
* @param in
* @throws IOException
- * @throws InvalidFormatException
*/
- public StringDictionary(InputStream in) throws IOException,
- InvalidFormatException {
+ public StringDictionary(InputStream in) throws IOException {
DictionarySerializer.create(in, new EntryInserter() {
public void insert(Entry entry) throws InvalidFormatException {
String valueString = entry.getAttributes().getValue("value");
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
index a3b3f00..6e7ecda 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
@@ -70,7 +70,7 @@ import org.apache.uima.util.Logger;
public class Parser extends CasAnnotator_ImplBase {
private static class ParseConverter {
- private Map<Integer, Integer> mIndexMap = new HashMap<Integer, Integer>();
+ private Map<Integer, Integer> mIndexMap = new HashMap<>();
private Parse mParseForTagger;
@@ -271,7 +271,7 @@ public class Parser extends CasAnnotator_ImplBase {
Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator(
allTokens.iterator(), containingConstraint);
- List<Span> tokenSpans = new LinkedList<Span>();
+ List<Span> tokenSpans = new LinkedList<>();
while(containingTokens.hasNext()) {
AnnotationFS token = containingTokens.next();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
index 2fdc47c..358e82c 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
@@ -164,9 +164,9 @@ public final class POSTagger extends CasAnnotator_ImplBase {
for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {
- final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<AnnotationFS>();
+ final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<>();
- final List<String> sentenceTokenList = new LinkedList<String>();
+ final List<String> sentenceTokenList = new LinkedList<>();
for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
index 23158b9..16e1605 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
@@ -17,26 +17,25 @@
package opennlp.uima.postag;
-import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-
import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.postag.*;
+import opennlp.tools.postag.POSDictionary;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.postag.POSTaggerFactory;
+import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.TrainingParameters;
-import opennlp.tools.util.model.ModelType;
import opennlp.uima.util.AnnotatorUtil;
import opennlp.uima.util.CasConsumerUtil;
import opennlp.uima.util.ContainingConstraint;
import opennlp.uima.util.OpennlpUtil;
import opennlp.uima.util.UimaUtil;
-
import org.apache.uima.UimaContext;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIndex;
@@ -82,7 +81,7 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase {
private Logger mLogger;
- private List<POSSample> mPOSSamples = new ArrayList<POSSample>();
+ private List<POSSample> mPOSSamples = new ArrayList<>();
private String language;
@@ -173,8 +172,8 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase {
ContainingConstraint containingConstraint =
new ContainingConstraint(sentence);
- List<String> tokens = new ArrayList<String>();
- List<String> tags = new ArrayList<String>();
+ List<String> tokens = new ArrayList<>();
+ List<String> tags = new ArrayList<>();
Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
allTokens.iterator(), containingConstraint);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
index 8fa22d7..2c110bd 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
@@ -67,7 +67,7 @@ import org.apache.uima.util.ProcessTrace;
*/
public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
- private List<SentenceSample> sentenceSamples = new ArrayList<SentenceSample>();
+ private List<SentenceSample> sentenceSamples = new ArrayList<>();
private Type mSentenceType;
@@ -75,8 +75,6 @@ public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
private String language = "en";
- private Logger mLogger;
-
private UimaContext mContext;
private String eosChars;
@@ -94,7 +92,7 @@ public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
mContext = getUimaContext();
- mLogger = mContext.getLogger();
+ Logger mLogger = mContext.getLogger();
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " +
@@ -172,11 +170,11 @@ public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
TrainingParameters mlParams = ModelUtil.createDefaultTrainingParameters();
ObjectStream<SentenceSample> samples = ObjectStreamUtils.createObjectStream(sentenceSamples);
- Writer samplesOut = null;
+ Writer samplesOut;
if (sampleTraceFile != null) {
samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
- samples = new SampleTraceStream<SentenceSample>(samples, samplesOut);
+ samples = new SampleTraceStream<>(samples, samplesOut);
}
SentenceModel sentenceModel = SentenceDetectorME.train(language, samples,
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
index ece9eca..66d1dfa 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
@@ -29,19 +29,6 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
import opennlp.tools.ml.maxent.GIS;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenSampleStream;
@@ -58,6 +45,18 @@ import opennlp.uima.util.ContainingConstraint;
import opennlp.uima.util.OpennlpUtil;
import opennlp.uima.util.SampleTraceStream;
import opennlp.uima.util.UimaUtil;
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.ProcessTrace;
/**
* OpenNLP Tokenizer trainer.
@@ -80,10 +79,10 @@ import opennlp.uima.util.UimaUtil;
*/
public final class TokenizerTrainer extends CasConsumer_ImplBase {
- public static final String IS_ALPHA_NUMERIC_OPTIMIZATION =
+ private static final String IS_ALPHA_NUMERIC_OPTIMIZATION =
"opennlp.uima.tokenizer.IsAlphaNumericOptimization";
- private List<TokenSample> tokenSamples = new ArrayList<TokenSample>();
+ private List<TokenSample> tokenSamples = new ArrayList<>();
private UimaContext mContext;
@@ -194,7 +193,7 @@ public final class TokenizerTrainer extends CasConsumer_ImplBase {
Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
allTokens.iterator(), containingConstraint);
- List<Span> openNLPSpans = new LinkedList<Span>();
+ List<Span> openNLPSpans = new LinkedList<>();
while (containingTokens.hasNext()) {
AnnotationFS tokenAnnotation = containingTokens.next();
@@ -231,7 +230,7 @@ public final class TokenizerTrainer extends CasConsumer_ImplBase {
// if trace file
// serialize events ...
- Writer samplesOut = null;
+ Writer samplesOut;
TokenizerModel tokenModel;
if (additionalTrainingDataFile != null) {
@@ -255,7 +254,7 @@ public final class TokenizerTrainer extends CasConsumer_ImplBase {
if (sampleTraceFile != null) {
samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
- samples = new SampleTraceStream<TokenSample>(samples, samplesOut);
+ samples = new SampleTraceStream<>(samples, samplesOut);
}
tokenModel = TokenizerME.train(language, samples, isSkipAlphaNumerics);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
index 20c00ed..57ae612 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
@@ -30,8 +30,7 @@ import org.apache.uima.cas.text.AnnotationFS;
public final class ContainingConstraint implements FSMatchConstraint {
private static final long serialVersionUID = 1;
- private Collection<AnnotationFS> mContainingAnnotations =
- new LinkedList<AnnotationFS>();
+ private Collection<AnnotationFS> mContainingAnnotations = new LinkedList<>();
/**
* Initializes a new instance.
@@ -69,12 +68,8 @@ public final class ContainingConstraint implements FSMatchConstraint {
}
private boolean isContaining(AnnotationFS annotation, AnnotationFS containing) {
- if ((containing.getBegin() <= annotation.getBegin())
- && (containing.getEnd() >= annotation.getEnd())) {
- return true;
- } else {
- return false;
- }
+ return (containing.getBegin() <= annotation.getBegin())
+ && (containing.getEnd() >= annotation.getEnd());
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
index 59e8566..23d0b3a 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
@@ -56,7 +56,7 @@ final public class OpennlpUtil {
}
}
- public static final byte[] loadBytes(File inFile) throws IOException {
+ public static byte[] loadBytes(File inFile) throws IOException {
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
try (InputStream in = new FileInputStream(inFile)) {
@@ -71,7 +71,7 @@ final public class OpennlpUtil {
return bytes.toByteArray();
}
- public static final TrainingParameters loadTrainingParams(String inFileValue,
+ public static TrainingParameters loadTrainingParams(String inFileValue,
boolean isSequenceTrainingAllowed) throws ResourceInitializationException {
TrainingParameters params;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbda5de4/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java
index 610cdfd..5d4efc6 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java
@@ -102,7 +102,7 @@ public final class UimaUtil {
Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator(
allRemoveAnnotations.iterator(), containingConstraint);
- Collection<AnnotationFS> removeAnnotations = new LinkedList<AnnotationFS>();
+ Collection<AnnotationFS> removeAnnotations = new LinkedList<>();
while (containingTokens.hasNext()) {
removeAnnotations.add(containingTokens.next());
[5/7] opennlp git commit: Define update and clearAdaptiveData as
default methods
Posted by jo...@apache.org.
Define update and clearAdaptiveData as default methods
See issue OPENNLP-856
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/99323ad6
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/99323ad6
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/99323ad6
Branch: refs/heads/897
Commit: 99323ad68c5eb3f9bae4314984931acca3699521
Parents: 71e40d0
Author: Kottmann <jo...@apache.org>
Authored: Thu Dec 22 17:31:54 2016 +0100
Committer: Kottmann <jo...@apache.org>
Committed: Thu Dec 22 17:35:52 2016 +0100
----------------------------------------------------------------------
.../opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java | 4 ++--
.../tools/util/featuregen/AdditionalContextFeatureGenerator.java | 2 +-
.../tools/util/featuregen/BigramNameFeatureGenerator.java | 2 +-
.../tools/util/featuregen/BrownBigramFeatureGenerator.java | 2 +-
.../tools/util/featuregen/BrownTokenClassFeatureGenerator.java | 2 +-
.../tools/util/featuregen/BrownTokenFeatureGenerator.java | 2 +-
.../tools/util/featuregen/CharacterNgramFeatureGenerator.java | 2 +-
.../tools/util/featuregen/DictionaryFeatureGenerator.java | 2 +-
.../tools/util/featuregen/DocumentBeginFeatureGenerator.java | 2 +-
.../tools/util/featuregen/FastTokenClassFeatureGenerator.java | 2 +-
.../opennlp/tools/util/featuregen/FeatureGeneratorAdapter.java | 3 +++
.../main/java/opennlp/tools/util/featuregen/InSpanGenerator.java | 2 +-
.../tools/util/featuregen/OutcomePriorFeatureGenerator.java | 2 +-
.../opennlp/tools/util/featuregen/PrefixFeatureGenerator.java | 2 +-
.../opennlp/tools/util/featuregen/SentenceFeatureGenerator.java | 2 +-
.../opennlp/tools/util/featuregen/SuffixFeatureGenerator.java | 2 +-
.../tools/util/featuregen/TokenClassFeatureGenerator.java | 2 +-
.../opennlp/tools/util/featuregen/TokenFeatureGenerator.java | 2 +-
.../tools/util/featuregen/TokenPatternFeatureGenerator.java | 2 +-
.../tools/util/featuregen/TrigramNameFeatureGenerator.java | 2 +-
.../tools/util/featuregen/WordClusterFeatureGenerator.java | 2 +-
.../opennlp/tools/util/featuregen/IdentityFeatureGenerator.java | 2 +-
22 files changed, 25 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java
index 6969504..ce8e9e4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java
@@ -58,11 +58,11 @@ public interface AdaptiveFeatureGenerator {
* @param tokens The tokens of the sentence or other text unit which has been processed.
* @param outcomes The outcomes associated with the specified tokens.
*/
- void updateAdaptiveData(String[] tokens, String[] outcomes);
+ default void updateAdaptiveData(String[] tokens, String[] outcomes) {};
/**
* Informs the feature generator that the context of the adaptive data (typically a document)
* is no longer valid.
*/
- void clearAdaptiveData();
+ default void clearAdaptiveData() {};
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java
index 233c31d..3606bfc 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java
@@ -24,7 +24,7 @@ import java.util.List;
* The {@link AdditionalContextFeatureGenerator} generates the context from the passed
* in additional context.
*/
-public class AdditionalContextFeatureGenerator extends FeatureGeneratorAdapter {
+public class AdditionalContextFeatureGenerator implements AdaptiveFeatureGenerator {
private String[][] additionalContext;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
index a3d16d8..779628b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
@@ -19,7 +19,7 @@ package opennlp.tools.util.featuregen;
import java.util.List;
-public class BigramNameFeatureGenerator extends FeatureGeneratorAdapter {
+public class BigramNameFeatureGenerator implements AdaptiveFeatureGenerator {
public void createFeatures(List<String> features, String[] tokens, int index, String[] previousOutcomes) {
String wc = FeatureGeneratorUtil.tokenFeature(tokens[index]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java
index 9329757..a932e74 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java
@@ -22,7 +22,7 @@ import java.util.List;
/**
* Generates Brown cluster features for token bigrams.
*/
-public class BrownBigramFeatureGenerator extends FeatureGeneratorAdapter {
+public class BrownBigramFeatureGenerator implements AdaptiveFeatureGenerator {
private BrownCluster brownLexicon;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java
index 8f53be9..db3d774 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java
@@ -22,7 +22,7 @@ import java.util.List;
/**
* Generates Brown cluster features for current token and token class.
*/
-public class BrownTokenClassFeatureGenerator extends FeatureGeneratorAdapter {
+public class BrownTokenClassFeatureGenerator implements AdaptiveFeatureGenerator {
private BrownCluster brownLexicon;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java
index 5689897..f41203f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java
@@ -22,7 +22,7 @@ import java.util.List;
/**
* Generates Brown cluster features for current token.
*/
-public class BrownTokenFeatureGenerator extends FeatureGeneratorAdapter {
+public class BrownTokenFeatureGenerator implements AdaptiveFeatureGenerator {
private BrownCluster brownLexicon;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
index 6314dac..8c0edd9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
@@ -28,7 +28,7 @@ import opennlp.tools.util.StringUtil;
* generate features about each token.
* The minimum and maximum length can be specified.
*/
-public class CharacterNgramFeatureGenerator extends FeatureGeneratorAdapter {
+public class CharacterNgramFeatureGenerator implements AdaptiveFeatureGenerator {
private final int minLength;
private final int maxLength;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java
index bbedcc2..68037ed 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java
@@ -31,7 +31,7 @@ import opennlp.tools.namefind.DictionaryNameFinder;
* @see DictionaryNameFinder
* @see InSpanGenerator
*/
-public class DictionaryFeatureGenerator extends FeatureGeneratorAdapter {
+public class DictionaryFeatureGenerator implements AdaptiveFeatureGenerator {
private InSpanGenerator isg;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
index b201e9a..efcfce4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
@@ -19,7 +19,7 @@ package opennlp.tools.util.featuregen;
import java.util.List;
-public class DocumentBeginFeatureGenerator extends FeatureGeneratorAdapter {
+public class DocumentBeginFeatureGenerator implements AdaptiveFeatureGenerator {
private String firstSentence[];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
index 980d2ad..5c82733 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
@@ -30,7 +30,7 @@ import opennlp.tools.util.StringUtil;
* @deprecated Use {@link TokenClassFeatureGenerator} instead!
*/
@Deprecated
-public class FastTokenClassFeatureGenerator extends FeatureGeneratorAdapter {
+public class FastTokenClassFeatureGenerator implements AdaptiveFeatureGenerator {
private static final String TOKEN_CLASS_PREFIX = "wc";
private static final String TOKEN_AND_CLASS_PREFIX = "w&c";
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorAdapter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorAdapter.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorAdapter.java
index f2947cf..8f444ce 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorAdapter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorAdapter.java
@@ -22,7 +22,10 @@ package opennlp.tools.util.featuregen;
* This class provides empty implementations of some of the optional methods in
* {@link AdditionalContextFeatureGenerator} to make implementing feature generators
* easier.
+ *
+ * @deprecated use AdaptiveFeatureGenerator instead
*/
+@Deprecated // in 1.7.0
public abstract class FeatureGeneratorAdapter implements AdaptiveFeatureGenerator {
public void updateAdaptiveData(String[] tokens, String[] outcomes) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
index acd81ba..bcffc7e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
@@ -27,7 +27,7 @@ import opennlp.tools.util.Span;
* Generates features if the tokens are recognized by the provided
* {@link TokenNameFinder}.
*/
-public class InSpanGenerator extends FeatureGeneratorAdapter {
+public class InSpanGenerator implements AdaptiveFeatureGenerator {
private final String prefix;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/OutcomePriorFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/OutcomePriorFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/OutcomePriorFeatureGenerator.java
index 9593e8d..c2281f5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/OutcomePriorFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/OutcomePriorFeatureGenerator.java
@@ -23,7 +23,7 @@ import java.util.List;
/**
* The definition feature maps the underlying distribution of outcomes.
*/
-public class OutcomePriorFeatureGenerator extends FeatureGeneratorAdapter {
+public class OutcomePriorFeatureGenerator implements AdaptiveFeatureGenerator {
public static final String OUTCOME_PRIOR_FEATURE = "def";
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
index 10b9c55..8d433b6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
@@ -19,7 +19,7 @@ package opennlp.tools.util.featuregen;
import java.util.List;
-public class PrefixFeatureGenerator extends FeatureGeneratorAdapter {
+public class PrefixFeatureGenerator implements AdaptiveFeatureGenerator {
private static final int PREFIX_LENGTH = 4;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGenerator.java
index 66b10d4..f7b81dc 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGenerator.java
@@ -22,7 +22,7 @@ import java.util.List;
/**
* This feature generator creates sentence begin and end features.
*/
-public class SentenceFeatureGenerator extends FeatureGeneratorAdapter {
+public class SentenceFeatureGenerator implements AdaptiveFeatureGenerator {
private final boolean isGenerateFirstWordFeature;
private final boolean isGenerateLastWordFeature;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
index 5ec34aa..815cc9e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
@@ -19,7 +19,7 @@ package opennlp.tools.util.featuregen;
import java.util.List;
-public class SuffixFeatureGenerator extends FeatureGeneratorAdapter {
+public class SuffixFeatureGenerator implements AdaptiveFeatureGenerator {
private static final int SUFFIX_LENGTH = 4;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
index 4aad40c..7358d24 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
@@ -25,7 +25,7 @@ import opennlp.tools.util.StringUtil;
/**
* Generates features for different for the class of the token.
*/
-public class TokenClassFeatureGenerator extends FeatureGeneratorAdapter {
+public class TokenClassFeatureGenerator implements AdaptiveFeatureGenerator {
private static final String TOKEN_CLASS_PREFIX = "wc";
private static final String TOKEN_AND_CLASS_PREFIX = "w&c";
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
index b0c9c5a..5aee49b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
@@ -25,7 +25,7 @@ import opennlp.tools.util.StringUtil;
/**
* Generates a feature which contains the token itself.
*/
-public class TokenFeatureGenerator extends FeatureGeneratorAdapter {
+public class TokenFeatureGenerator implements AdaptiveFeatureGenerator {
private static final String WORD_PREFIX = "w";
private boolean lowercase;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
index 48a855f..79e61b2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
@@ -29,7 +29,7 @@ import opennlp.tools.util.StringUtil;
* Partitions tokens into sub-tokens based on character classes and generates
* class features for each of the sub-tokens and combinations of those sub-tokens.
*/
-public class TokenPatternFeatureGenerator extends FeatureGeneratorAdapter {
+public class TokenPatternFeatureGenerator implements AdaptiveFeatureGenerator {
private Pattern noLetters = Pattern.compile("[^a-zA-Z]");
private Tokenizer tokenizer;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java
index 0fa61e0..7724eb5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java
@@ -23,7 +23,7 @@ import java.util.List;
* Adds trigram features based on tokens and token classes.
*
*/
-public class TrigramNameFeatureGenerator extends FeatureGeneratorAdapter {
+public class TrigramNameFeatureGenerator implements AdaptiveFeatureGenerator {
public void createFeatures(List<String> features, String[] tokens, int index,
String[] previousOutcomes) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
index 4bd0a6d..11e0e6b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
@@ -21,7 +21,7 @@ import java.util.List;
import opennlp.tools.util.StringUtil;
-public class WordClusterFeatureGenerator extends FeatureGeneratorAdapter {
+public class WordClusterFeatureGenerator implements AdaptiveFeatureGenerator {
private WordClusterDictionary tokenDictionary;
private String resourceName;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/99323ad6/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java
index 70cbd15..970fe63 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java
@@ -19,7 +19,7 @@ package opennlp.tools.util.featuregen;
import java.util.List;
-class IdentityFeatureGenerator extends FeatureGeneratorAdapter {
+class IdentityFeatureGenerator implements AdaptiveFeatureGenerator {
public void createFeatures(List<String> features, String[] tokens, int index,
String[] previousOutcomes) {
[4/7] opennlp git commit: OPENNLP-900: LemmatizerME and
DictionaryLemmatizer share data format
Posted by jo...@apache.org.
OPENNLP-900: LemmatizerME and DictionaryLemmatizer share data format
This closes #16
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/71e40d0b
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/71e40d0b
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/71e40d0b
Branch: refs/heads/897
Commit: 71e40d0b30fbef976da3595fef8736a4e1245466
Parents: 927ee0f
Author: Daniel Russ <dr...@mail.nih.gov>
Authored: Thu Dec 22 10:55:04 2016 -0500
Committer: Kottmann <jo...@apache.org>
Committed: Thu Dec 22 17:22:34 2016 +0100
----------------------------------------------------------------------
.../main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/71e40d0b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
index dde2d08..683b97c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
@@ -54,7 +54,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
try {
while ((line = breader.readLine()) != null) {
final String[] elems = line.split("\t");
- this.dictMap.put(Arrays.asList(elems[0], elems[2]), elems[1]);
+ this.dictMap.put(Arrays.asList(elems[0], elems[1]), elems[2]);
}
} catch (final IOException e) {
e.printStackTrace();
@@ -84,7 +84,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
keys.addAll(Arrays.asList(word.toLowerCase(), postag));
return keys;
}
-
+
public String[] lemmatize(final String[] tokens, final String[] postags) {
List<String> lemmas = new ArrayList<>();
for (int i = 0; i < tokens.length; i++) {
[7/7] opennlp git commit: Update Jersey to 2.25 and use Grizzly HTTP
Server
Posted by jo...@apache.org.
Update Jersey to 2.25 and use Grizzly HTTP Server
See issue OPENNNLP-897
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/6d1f8244
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/6d1f8244
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/6d1f8244
Branch: refs/heads/897
Commit: 6d1f82448ff499193dbc4b738c88b0e545a02218
Parents: c49a87a
Author: Joern Kottmann <jo...@apache.org>
Authored: Wed Dec 21 14:55:24 2016 +0100
Committer: Kottmann <jo...@apache.org>
Committed: Fri Dec 23 18:45:56 2016 +0100
----------------------------------------------------------------------
opennlp-brat-annotator/pom.xml | 24 ++++---------
.../opennlp/bratann/NameFinderAnnService.java | 36 +++++---------------
2 files changed, 15 insertions(+), 45 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/6d1f8244/opennlp-brat-annotator/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
index 65e3b69..1007f61 100644
--- a/opennlp-brat-annotator/pom.xml
+++ b/opennlp-brat-annotator/pom.xml
@@ -33,27 +33,15 @@
<dependencies>
<dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>9.2.3.v20140905</version>
+ <groupId>org.glassfish.jersey.containers</groupId>
+ <artifactId>jersey-container-grizzly2-http</artifactId>
+ <version>2.25</version>
</dependency>
<dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>9.2.3.v20140905</version>
- </dependency>
-
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-bundle</artifactId>
- <version>1.18.1</version>
- </dependency>
-
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-json</artifactId>
- <version>1.18.1</version>
+ <groupId>org.glassfish.jersey.media</groupId>
+ <artifactId>jersey-media-json-jackson</artifactId>
+ <version>2.25</version>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/opennlp/blob/6d1f8244/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
index 60d2a1b..b2dbce4 100644
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
@@ -18,13 +18,11 @@
package opennlp.bratann;
import java.io.File;
+import java.net.URI;
import java.util.Arrays;
import java.util.List;
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.servlet.ServletContextHandler;
-import org.eclipse.jetty.servlet.ServletHolder;
-
+import com.sun.net.httpserver.HttpServer;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinder;
import opennlp.tools.namefind.TokenNameFinderModel;
@@ -37,6 +35,10 @@ import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
+import org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpServerFactory;
+import org.glassfish.jersey.server.ResourceConfig;
+
+import javax.ws.rs.core.UriBuilder;
public class NameFinderAnnService {
@@ -92,28 +94,8 @@ public class NameFinderAnnService {
nameFinders = new TokenNameFinder[] { new NameFinderME(
new TokenNameFinderModel(new File(args[args.length - 1]))) };
- ServletContextHandler context = new ServletContextHandler(
- ServletContextHandler.SESSIONS);
- context.setContextPath("/");
-
- Server jettyServer = new Server(serverPort);
- jettyServer.setHandler(context);
-
- ServletHolder jerseyServlet = context
- .addServlet(com.sun.jersey.spi.container.servlet.ServletContainer.class, "/*");
- jerseyServlet.setInitParameter("com.sun.jersey.config.property.packages",
- "opennlp.bratann");
- jerseyServlet.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true");
- jerseyServlet.setInitOrder(0);
-
- jerseyServlet.setInitParameter("jersey.config.server.provider.classnames",
- NameFinderResource.class.getCanonicalName());
-
- try {
- jettyServer.start();
- jettyServer.join();
- } finally {
- jettyServer.destroy();
- }
+ URI baseUri = UriBuilder.fromUri("http://localhost/").port(serverPort).build();
+ ResourceConfig config = new ResourceConfig(NameFinderResource.class);
+ GrizzlyHttpServerFactory.createHttpServer(baseUri, config);
}
}
[6/7] opennlp git commit: OPENNLP-871: Cleanup for Java 8 and remove
deprecated tokenizer code
Posted by jo...@apache.org.
OPENNLP-871: Cleanup for Java 8 and remove deprecated tokenizer code
Closes #17
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/c49a87ab
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/c49a87ab
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/c49a87ab
Branch: refs/heads/897
Commit: c49a87abdb6c07a123a2234f4ec5ca3d21306d69
Parents: 99323ad
Author: smarthi <sm...@apache.org>
Authored: Thu Dec 22 22:39:33 2016 -0500
Committer: Kottmann <jo...@apache.org>
Committed: Fri Dec 23 16:46:42 2016 +0100
----------------------------------------------------------------------
.../tools/cmdline/chunker/ChunkerMETool.java | 6 +-
.../opennlp/tools/dictionary/Dictionary.java | 7 +-
.../tools/tokenize/TokenizerCrossValidator.java | 36 --------
.../tools/tokenize/TokenizerFactory.java | 22 ++---
.../opennlp/tools/tokenize/TokenizerME.java | 89 --------------------
.../opennlp/tools/tokenize/TokenizerModel.java | 53 ------------
.../opennlp/tools/eval/Conll00ChunkerEval.java | 3 +-
7 files changed, 13 insertions(+), 203 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c49a87ab/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
index 5b87c9e..b511a0b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
@@ -69,11 +69,9 @@ public class ChunkerMETool extends BasicCmdLineTool {
continue;
}
- String[] chunks = chunker.chunk(posSample.getSentence(),
- posSample.getTags());
+ String[] chunks = chunker.chunk(posSample.getSentence(), posSample.getTags());
- System.out.println(new ChunkSample(posSample.getSentence(),
- posSample.getTags(), chunks).nicePrint());
+ System.out.println(new ChunkSample(posSample.getSentence(), posSample.getTags(), chunks).nicePrint());
perfMon.incrementCounter();
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c49a87ab/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
index 58b7a6e..4961741 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
@@ -31,7 +31,6 @@ import java.util.StringTokenizer;
import opennlp.tools.dictionary.serializer.Attributes;
import opennlp.tools.dictionary.serializer.DictionarySerializer;
import opennlp.tools.dictionary.serializer.Entry;
-import opennlp.tools.dictionary.serializer.EntryInserter;
import opennlp.tools.util.StringList;
import opennlp.tools.util.StringUtil;
@@ -113,11 +112,7 @@ public class Dictionary implements Iterable<StringList> {
* @throws IOException
*/
public Dictionary(InputStream in) throws IOException {
- isCaseSensitive = DictionarySerializer.create(in, new EntryInserter() {
- public void insert(Entry entry) {
- put(entry.getTokens());
- }
- });
+ isCaseSensitive = DictionarySerializer.create(in, entry -> put(entry.getTokens()));
}
/**
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c49a87ab/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
index 3ca3c1d..fe9e4c6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
@@ -18,13 +18,10 @@
package opennlp.tools.tokenize;
import java.io.IOException;
-
-import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.eval.CrossValidationPartitioner;
import opennlp.tools.util.eval.FMeasure;
-import opennlp.tools.util.model.ModelUtil;
public class TokenizerCrossValidator {
@@ -42,39 +39,6 @@ public class TokenizerCrossValidator {
}
/**
- * @deprecated use
- * {@link #TokenizerCrossValidator(TrainingParameters, TokenizerFactory, TokenizerEvaluationMonitor...)}
- * instead and pass in a {@link TokenizerFactory}
- */
- public TokenizerCrossValidator(String language, Dictionary abbreviations,
- boolean alphaNumericOptimization, TrainingParameters params,
- TokenizerEvaluationMonitor ... listeners) {
- this(params, new TokenizerFactory(language, abbreviations,
- alphaNumericOptimization, null), listeners);
- }
-
- /**
- * @deprecated use
- * {@link #TokenizerCrossValidator(TrainingParameters, TokenizerFactory, TokenizerEvaluationMonitor...)}
- * instead and pass in a {@link TokenizerFactory}
- */
- public TokenizerCrossValidator(String language, boolean alphaNumericOptimization) {
- this(language, alphaNumericOptimization, ModelUtil.createDefaultTrainingParameters());
- }
-
- /**
- * @deprecated use
- * {@link #TokenizerCrossValidator(TrainingParameters, TokenizerFactory, TokenizerEvaluationMonitor...)}
- * instead and pass in a {@link TokenizerFactory}
- */
- public TokenizerCrossValidator(String language,
- boolean alphaNumericOptimization, TrainingParameters params,
- TokenizerEvaluationMonitor ... listeners) {
- this(language, null, alphaNumericOptimization, params, listeners);
- }
-
-
- /**
* Starts the evaluation.
*
* @param samples
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c49a87ab/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java
index f9e789a..ffa793b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java
@@ -37,7 +37,7 @@ public class TokenizerFactory extends BaseToolFactory {
private String languageCode;
private Dictionary abbreviationDictionary;
- private Boolean useAlphaNumericOptimization;
+ private Boolean useAlphaNumericOptimization = false;
private Pattern alphaNumericPattern;
private static final String ABBREVIATIONS_ENTRY_NAME = "abbreviations.dictionary";
@@ -112,13 +112,11 @@ public class TokenizerFactory extends BaseToolFactory {
public Map<String, String> createManifestEntries() {
Map<String, String> manifestEntries = super.createManifestEntries();
- manifestEntries.put(USE_ALPHA_NUMERIC_OPTIMIZATION,
- Boolean.toString(isUseAlphaNumericOptmization()));
+ manifestEntries.put(USE_ALPHA_NUMERIC_OPTIMIZATION, Boolean.toString(isUseAlphaNumericOptmization()));
// alphanumeric pattern is optional
if (getAlphaNumericPattern() != null)
- manifestEntries.put(ALPHA_NUMERIC_PATTERN, getAlphaNumericPattern()
- .pattern());
+ manifestEntries.put(ALPHA_NUMERIC_PATTERN, getAlphaNumericPattern().pattern());
return manifestEntries;
}
@@ -167,9 +165,8 @@ public class TokenizerFactory extends BaseToolFactory {
*/
public Pattern getAlphaNumericPattern() {
if (this.alphaNumericPattern == null) {
- if (artifactProvider != null) {
- String prop = this.artifactProvider
- .getManifestProperty(ALPHA_NUMERIC_PATTERN);
+ if (this.artifactProvider != null) {
+ String prop = this.artifactProvider.getManifestProperty(ALPHA_NUMERIC_PATTERN);
if (prop != null) {
this.alphaNumericPattern = Pattern.compile(prop);
}
@@ -189,8 +186,8 @@ public class TokenizerFactory extends BaseToolFactory {
* @return true if the alpha numeric optimization is enabled, otherwise false
*/
public boolean isUseAlphaNumericOptmization() {
- if (this.useAlphaNumericOptimization == null && artifactProvider != null) {
- this.useAlphaNumericOptimization = Boolean.valueOf(artifactProvider
+ if (artifactProvider != null) {
+ this.useAlphaNumericOptimization = Boolean.valueOf(this.artifactProvider
.getManifestProperty(USE_ALPHA_NUMERIC_OPTIMIZATION));
}
return this.useAlphaNumericOptimization;
@@ -203,8 +200,7 @@ public class TokenizerFactory extends BaseToolFactory {
*/
public Dictionary getAbbreviationDictionary() {
if (this.abbreviationDictionary == null && artifactProvider != null) {
- this.abbreviationDictionary = artifactProvider
- .getArtifact(ABBREVIATIONS_ENTRY_NAME);
+ this.abbreviationDictionary = this.artifactProvider.getArtifact(ABBREVIATIONS_ENTRY_NAME);
}
return this.abbreviationDictionary;
}
@@ -215,7 +211,7 @@ public class TokenizerFactory extends BaseToolFactory {
* @return the language code
*/
public String getLanguageCode() {
- if (this.languageCode == null && artifactProvider != null) {
+ if (this.languageCode == null && this.artifactProvider != null) {
this.languageCode = this.artifactProvider.getLanguage();
}
return this.languageCode;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c49a87ab/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
index 4c4c638..491b6fa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
@@ -34,7 +34,6 @@ import opennlp.tools.tokenize.lang.Factory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
-import opennlp.tools.util.model.ModelUtil;
/**
* A Tokenizer for converting raw text into separated tokens. It uses
@@ -254,94 +253,6 @@ public class TokenizerME extends AbstractTokenizer {
}
/**
- * Trains a model for the {@link TokenizerME}.
- *
- * @param languageCode the language of the natural text
- * @param samples the samples used for the training.
- * @param useAlphaNumericOptimization - if true alpha numerics are skipped
- * @param mlParams the machine learning train parameters
- *
- * @return the trained {@link TokenizerModel}
- *
- * @throws IOException it throws an {@link IOException} if an {@link IOException}
- * is thrown during IO operations on a temp file which is created during training.
- * Or if reading from the {@link ObjectStream} fails.
- *
- * @deprecated Use
- * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
- * and pass in a {@link TokenizerFactory}
- */
- public static TokenizerModel train(String languageCode, ObjectStream<TokenSample> samples,
- boolean useAlphaNumericOptimization, TrainingParameters mlParams) throws IOException {
- return train(languageCode, samples, null, useAlphaNumericOptimization,
- mlParams);
- }
-
- /**
- * Trains a model for the {@link TokenizerME}.
- *
- * @param languageCode the language of the natural text
- * @param samples the samples used for the training.
- * @param abbreviations an abbreviations dictionary
- * @param useAlphaNumericOptimization - if true alpha numerics are skipped
- * @param mlParams the machine learning train parameters
- *
- * @return the trained {@link TokenizerModel}
- *
- * @throws IOException it throws an {@link IOException} if an {@link IOException}
- * is thrown during IO operations on a temp file which is created during training.
- * Or if reading from the {@link ObjectStream} fails.
- *
- * @deprecated Use
- * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
- * and pass in a {@link TokenizerFactory}
- */
- public static TokenizerModel train(String languageCode,
- ObjectStream<TokenSample> samples, Dictionary abbreviations,
- boolean useAlphaNumericOptimization, TrainingParameters mlParams)
- throws IOException {
- Factory factory = new Factory();
-
- Map<String, String> manifestInfoEntries = new HashMap<>();
-
- ObjectStream<Event> eventStream = new TokSpanEventStream(samples,
- useAlphaNumericOptimization, factory.getAlphanumeric(languageCode),
- factory.createTokenContextGenerator(languageCode,
- getAbbreviations(abbreviations)));
-
- EventTrainer trainer = TrainerFactory.getEventTrainer(
- mlParams.getSettings(), manifestInfoEntries);
-
- MaxentModel maxentModel = trainer.train(eventStream);
-
- return new TokenizerModel(languageCode, maxentModel, abbreviations,
- useAlphaNumericOptimization, manifestInfoEntries);
- }
-
-
- /**
- * Trains a model for the {@link TokenizerME} with a default cutoff of 5 and 100 iterations.
- *
- * @param languageCode the language of the natural text
- * @param samples the samples used for the training.
- * @param useAlphaNumericOptimization - if true alpha numerics are skipped
- *
- * @return the trained {@link TokenizerModel}
- *
- * @throws IOException it throws an {@link IOException} if an {@link IOException}
- * is thrown during IO operations on a temp file which is
- *
- * @deprecated Use
- * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
- * and pass in a {@link TokenizerFactory}
- */
- public static TokenizerModel train(String languageCode, ObjectStream<TokenSample> samples,
- boolean useAlphaNumericOptimization) throws IOException {
- return train(samples, TokenizerFactory.create(null, languageCode, null, useAlphaNumericOptimization, null),
- ModelUtil.createDefaultTrainingParameters());
- }
-
- /**
* Returns the value of the alpha-numeric optimization flag.
*
* @return true if the tokenizer should use alpha-numeric optimization, false otherwise.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c49a87ab/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
index e63b946..ed84b4e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
@@ -66,59 +66,6 @@ public final class TokenizerModel extends BaseModel {
/**
* Initializes the current instance.
*
- * @param language the language the tokenizer should use
- * @param tokenizerMaxentModel the statistical model of the tokenizer
- * @param abbreviations the dictionary containing the abbreviations
- * @param useAlphaNumericOptimization if true alpha numeric optimization is enabled, otherwise not
- * @param manifestInfoEntries the additional meta data which should be written into manifest
- *
- * @deprecated Use
- * {@link TokenizerModel#TokenizerModel(MaxentModel, Map, TokenizerFactory)}
- * instead and pass in a {@link TokenizerFactory}.
- */
- public TokenizerModel(String language, MaxentModel tokenizerMaxentModel,
- Dictionary abbreviations, boolean useAlphaNumericOptimization,
- Map<String, String> manifestInfoEntries) {
- this(tokenizerMaxentModel, manifestInfoEntries,
- new TokenizerFactory(language, abbreviations, useAlphaNumericOptimization, null));
- }
-
- /**
- * Initializes the current instance.
- *
- * @param language the language the tokenizer should use
- * @param tokenizerMaxentModel the statistical model of the tokenizer
- * @param useAlphaNumericOptimization if true alpha numeric optimization is enabled, otherwise not
- * @param manifestInfoEntries the additional meta data which should be written into manifest
- *
- * @deprecated Use
- * {@link TokenizerModel#TokenizerModel(MaxentModel, Map, TokenizerFactory)}
- * instead and pass in a {@link TokenizerFactory}.
- */
- public TokenizerModel(String language, AbstractModel tokenizerMaxentModel,
- boolean useAlphaNumericOptimization, Map<String, String> manifestInfoEntries) {
- this(language, tokenizerMaxentModel, null, useAlphaNumericOptimization, manifestInfoEntries);
- }
-
- /**
- * Initializes the current instance.
- *
- * @param language the language the tokenizer should use
- * @param tokenizerMaxentModel the statistical model of the tokenizer
- * @param useAlphaNumericOptimization if true alpha numeric optimization is enabled, otherwise not
- *
- * @deprecated Use
- * {@link TokenizerModel#TokenizerModel(MaxentModel, Map, TokenizerFactory)}
- * instead and pass in a {@link TokenizerFactory}.
- */
- public TokenizerModel(String language, AbstractModel tokenizerMaxentModel,
- boolean useAlphaNumericOptimization) {
- this(language, tokenizerMaxentModel, useAlphaNumericOptimization, null);
- }
-
- /**
- * Initializes the current instance.
- *
* @param in the Input Stream to load the model from
*
* @throws IOException if reading from the stream fails in anyway
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c49a87ab/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
index 3b8e060..0ff3002 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
@@ -59,8 +59,7 @@ public class Conll00ChunkerEval {
double expectedFMeasure) throws IOException {
ObjectStream<ChunkSample> samples = new ChunkSampleStream(
- new PlainTextByLineStream(new MarkableFileInputStreamFactory(testData),
- "UTF-8"));
+ new PlainTextByLineStream(new MarkableFileInputStreamFactory(testData),"UTF-8"));
ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model));
evaluator.evaluate(samples);