You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2016/12/26 13:34:43 UTC
[49/50] [abbrv] opennlp git commit: OPENNLP-901:Replace references to
deprecated NameFinderME.train(), this closes apache/opennlp#21
OPENNLP-901:Replace references to deprecated NameFinderME.train(), this closes apache/opennlp#21
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/dfbf6148
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/dfbf6148
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/dfbf6148
Branch: refs/heads/889
Commit: dfbf61485179f5c020a4f4a5e57db61451ad3c37
Parents: f781fca
Author: smarthi <sm...@apache.org>
Authored: Sun Dec 25 14:25:12 2016 -0500
Committer: smarthi <sm...@apache.org>
Committed: Sun Dec 25 14:25:12 2016 -0500
----------------------------------------------------------------------
.../java/opennlp/morfologik/cmdline/CLI.java | 4 +-
.../TokenNameFinderCrossValidatorTool.java | 4 +-
.../namefind/TokenNameFinderTrainerTool.java | 11 +-
.../opennlp/tools/namefind/NameFinderME.java | 141 +------------------
.../namefind/TokenNameFinderCrossValidator.java | 13 +-
.../tools/namefind/TokenNameFinderFactory.java | 38 +++--
.../tools/namefind/TokenNameFinderModel.java | 64 +--------
.../opennlp/tools/postag/POSTaggerFactory.java | 13 +-
.../tools/namefind/NameFinderMETest.java | 46 +++---
.../TokenNameFinderCrossValidatorTest.java | 13 +-
10 files changed, 68 insertions(+), 279 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
index f92d178..5205739 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
@@ -40,9 +40,9 @@ public final class CLI {
private static Map<String, CmdLineTool> toolLookupMap;
static {
- toolLookupMap = new LinkedHashMap<String, CmdLineTool>();
+ toolLookupMap = new LinkedHashMap<>();
- List<CmdLineTool> tools = new LinkedList<CmdLineTool>();
+ List<CmdLineTool> tools = new LinkedList<>();
tools.add(new MorfologikDictionaryBuilderTool());
tools.add(new XMLDictionaryToTableTool());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
index 93f52ec..aa1e343 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
@@ -73,7 +73,7 @@ public final class TokenNameFinderCrossValidatorTool
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
}
- List<EvaluationMonitor<NameSample>> listeners = new LinkedList<EvaluationMonitor<NameSample>>();
+ List<EvaluationMonitor<NameSample>> listeners = new LinkedList<>();
if (params.getMisclassified()) {
listeners.add(new NameEvaluationErrorListener());
}
@@ -94,7 +94,7 @@ public final class TokenNameFinderCrossValidatorTool
SequenceCodec<String> sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName);
- TokenNameFinderFactory nameFinderFactory = null;
+ TokenNameFinderFactory nameFinderFactory;
try {
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(),
featureGeneratorBytes, resources, sequenceCodec);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index 1f8a365..b2ccfc5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -89,13 +89,13 @@ public final class TokenNameFinderTrainerTool
* @return a map consisting of the file name of the resource and its corresponding Object
*/
public static Map<String, Object> loadResources(File resourcePath, File featureGenDescriptor) {
- Map<String, Object> resources = new HashMap<String, Object>();
+ Map<String, Object> resources = new HashMap<>();
if (resourcePath != null) {
Map<String, ArtifactSerializer> artifactSerializers = TokenNameFinderModel
.createArtifactSerializers();
- List<Element> elements = new ArrayList<Element>();
+ List<Element> elements = new ArrayList<>();
ArtifactSerializer serializer = null;
@@ -134,9 +134,6 @@ public final class TokenNameFinderTrainerTool
try (InputStream resourceIn = CmdLineUtil.openInFile(resourceFile)) {
resources.put(resourceName, serializer.create(resourceIn));
- } catch (InvalidFormatException e) {
- // TODO: Fix exception handling
- e.printStackTrace();
} catch (IOException e) {
// TODO: Fix exception handling
e.printStackTrace();
@@ -160,7 +157,7 @@ public final class TokenNameFinderTrainerTool
return loadResources(resourcePath, featureGeneratorDescriptor);
}
- return new HashMap<String, Object>();
+ return new HashMap<>();
}
public void run(String format, String[] args) {
@@ -200,7 +197,7 @@ public final class TokenNameFinderTrainerTool
SequenceCodec<String> sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName);
- TokenNameFinderFactory nameFinderFactory = null;
+ TokenNameFinderFactory nameFinderFactory;
try {
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(),
featureGeneratorBytes, resources, sequenceCodec);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
index ff8c143..69f7a4e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
@@ -26,6 +26,7 @@ import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.EventModelSequenceTrainer;
import opennlp.tools.ml.EventTrainer;
@@ -43,14 +44,7 @@ import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.AdditionalContextFeatureGenerator;
-import opennlp.tools.util.featuregen.BigramNameFeatureGenerator;
-import opennlp.tools.util.featuregen.CachedFeatureGenerator;
import opennlp.tools.util.featuregen.GeneratorFactory;
-import opennlp.tools.util.featuregen.OutcomePriorFeatureGenerator;
-import opennlp.tools.util.featuregen.PreviousMapFeatureGenerator;
-import opennlp.tools.util.featuregen.SentenceFeatureGenerator;
-import opennlp.tools.util.featuregen.TokenClassFeatureGenerator;
-import opennlp.tools.util.featuregen.TokenFeatureGenerator;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;
/**
@@ -91,21 +85,6 @@ public class NameFinderME implements TokenNameFinder {
new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
}
- @Deprecated
- /*
- * @deprecated the default feature generation is now always included in the models and loaded
- * if not by the factory. Subclasses using this methods should do the same.
- */
- static AdaptiveFeatureGenerator createFeatureGenerator() {
- return new CachedFeatureGenerator(
- new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2),
- new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2),
- new OutcomePriorFeatureGenerator(),
- new PreviousMapFeatureGenerator(),
- new BigramNameFeatureGenerator(),
- new SentenceFeatureGenerator(true, false));
- }
-
private static AdaptiveFeatureGenerator createFeatureGenerator(
byte[] generatorDescriptor, final Map<String, Object> resources)
throws IOException {
@@ -289,124 +268,6 @@ public class NameFinderME implements TokenNameFinder {
}
/**
- * Trains a name finder model.
- *
- * @param languageCode the language of the training data
- * @param type null or an override type for all types in the training data
- * @param samples the training data
- * @param trainParams machine learning train parameters
- * @param generator null or the feature generator
- * @param resources the resources for the name finder or null if none
- *
- * @return the newly trained model
- *
- * @throws IOException
- * @deprecated use
- * {@link NameFinderME#train(String, String, ObjectStream, TrainingParameters, TokenNameFinderFactory)}
- * instead.
- */
- @Deprecated
- static TokenNameFinderModel train(String languageCode, String type, ObjectStream<NameSample> samples,
- TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final Map<String, Object> resources)
- throws IOException {
-
- if (languageCode == null) {
- throw new IllegalArgumentException("languageCode must not be null!");
- }
-
- String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
-
- int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
- if (beamSizeString != null) {
- beamSize = Integer.parseInt(beamSizeString);
- }
-
- Map<String, String> manifestInfoEntries = new HashMap<>();
-
- AdaptiveFeatureGenerator featureGenerator;
-
- if (generator != null) {
- featureGenerator = generator;
- } else {
- featureGenerator = createFeatureGenerator();
- }
-
- MaxentModel nameFinderModel = null;
-
- SequenceClassificationModel<String> seqModel = null;
-
- TrainerType trainerType = TrainerFactory.getTrainerType(trainParams.getSettings());
-
- if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
- ObjectStream<Event> eventStream = new NameFinderEventStream(samples, type,
- new DefaultNameContextGenerator(featureGenerator), new BioCodec());
-
- EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams.getSettings(), manifestInfoEntries);
- nameFinderModel = trainer.train(eventStream);
- } else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) {
- NameSampleSequenceStream ss = new NameSampleSequenceStream(samples, featureGenerator);
-
- EventModelSequenceTrainer trainer = TrainerFactory.getEventModelSequenceTrainer(
- trainParams.getSettings(), manifestInfoEntries);
- nameFinderModel = trainer.train(ss);
- } else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
- SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer(
- trainParams.getSettings(), manifestInfoEntries);
-
- NameSampleSequenceStream ss = new NameSampleSequenceStream(samples, featureGenerator, false);
- seqModel = trainer.train(ss);
- } else {
- throw new IllegalStateException("Unexpected trainer type!");
- }
-
- // TODO: Pass the sequence codec down to the model! We will just store the class
- // name in the model, and then always use the extension loader to create it!
- // The cmd line interface, will replace shortcuts with actual class names.
- // depending on which one is not null!
- if (seqModel != null) {
- return new TokenNameFinderModel(languageCode, seqModel, null,
- resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory());
- } else {
- return new TokenNameFinderModel(languageCode, nameFinderModel, beamSize, null,
- resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory());
- }
- }
-
- /**
- * Trains a name finder model.
- *
- * @param languageCode the language of the training data
- * @param type null or an override type for all types in the training data
- * @param samples the training data
- * @param trainParams machine learning train parameters
- * @param featureGeneratorBytes descriptor to configure the feature generation
- * or null
- * @param resources the resources for the name finder or null if none
- *
- * @return the newly trained model
- *
- * @throws IOException
- * @deprecated use
- * {@link NameFinderME#train(String, String, ObjectStream, TrainingParameters, TokenNameFinderFactory)}
- * instead.
- */
- @Deprecated
- static TokenNameFinderModel train(String languageCode, String type,
- ObjectStream<NameSample> samples, TrainingParameters trainParams,
- byte[] featureGeneratorBytes, final Map<String, Object> resources)
- throws IOException {
-
- TokenNameFinderModel model = train(languageCode, type, samples, trainParams,
- createFeatureGenerator(featureGeneratorBytes, resources), resources);
-
- if (featureGeneratorBytes != null) {
- model = model.updateFeatureGenerator(featureGeneratorBytes);
- }
-
- return model;
- }
-
- /**
* Gets the name type from the outcome
*
* @param outcome the outcome
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
index 3d2547b..b4ff4e1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
@@ -170,9 +170,7 @@ public class TokenNameFinderCrossValidator {
this.type = type;
this.featureGeneratorBytes = featureGeneratorBytes;
this.resources = resources;
-
this.params = trainParams;
-
this.listeners = listeners;
}
@@ -212,17 +210,16 @@ public class TokenNameFinderCrossValidator {
while (partitioner.hasNext()) {
- CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner
- .next();
+ CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner.next();
TokenNameFinderModel model;
if (factory != null) {
- model = opennlp.tools.namefind.NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), params, factory);
+ model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream),
+ params, factory);
}
else {
- model = opennlp.tools.namefind.NameFinderME.train(languageCode, type,
- new DocumentToNameSampleStream(trainingSampleStream), params, featureGeneratorBytes, resources);
-
+ model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream),
+ params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec()));
}
// do testing
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
index 90381ff..55f1ab6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
@@ -30,12 +30,19 @@ import opennlp.tools.util.SequenceCodec;
import opennlp.tools.util.ext.ExtensionLoader;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.AggregatedFeatureGenerator;
-import opennlp.tools.util.featuregen.FeatureGeneratorResourceProvider;
+import opennlp.tools.util.featuregen.BigramNameFeatureGenerator;
+import opennlp.tools.util.featuregen.CachedFeatureGenerator;
import opennlp.tools.util.featuregen.GeneratorFactory;
+import opennlp.tools.util.featuregen.OutcomePriorFeatureGenerator;
+import opennlp.tools.util.featuregen.PreviousMapFeatureGenerator;
+import opennlp.tools.util.featuregen.SentenceFeatureGenerator;
+import opennlp.tools.util.featuregen.TokenClassFeatureGenerator;
+import opennlp.tools.util.featuregen.TokenFeatureGenerator;
+import opennlp.tools.util.featuregen.WindowFeatureGenerator;
// Idea of this factory is that most resources/impls used by the name finder
// can be modified through this class!
-// That only works if thats the central class used for training/runtime
+// That only works if that's the central class used for training/runtime
public class TokenNameFinderFactory extends BaseToolFactory {
@@ -52,7 +59,7 @@ public class TokenNameFinderFactory extends BaseToolFactory {
}
public TokenNameFinderFactory(byte[] featureGeneratorBytes, final Map<String, Object> resources,
- SequenceCodec<String> seqCodec) {
+ SequenceCodec<String> seqCodec) {
init(featureGeneratorBytes, resources, seqCodec);
}
@@ -142,7 +149,13 @@ public class TokenNameFinderFactory extends BaseToolFactory {
AdaptiveFeatureGenerator featureGenerator = createFeatureGenerators();
if (featureGenerator == null) {
- featureGenerator = NameFinderME.createFeatureGenerator();
+ featureGenerator = new CachedFeatureGenerator(
+ new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2),
+ new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2),
+ new OutcomePriorFeatureGenerator(),
+ new PreviousMapFeatureGenerator(),
+ new BigramNameFeatureGenerator(),
+ new SentenceFeatureGenerator(true, false));
}
return new DefaultNameContextGenerator(featureGenerator);
@@ -160,7 +173,7 @@ public class TokenNameFinderFactory extends BaseToolFactory {
public AdaptiveFeatureGenerator createFeatureGenerators() {
if (featureGeneratorBytes == null && artifactProvider != null) {
- featureGeneratorBytes = (byte[]) artifactProvider.getArtifact(
+ featureGeneratorBytes = artifactProvider.getArtifact(
TokenNameFinderModel.GENERATOR_DESCRIPTOR_ENTRY_NAME);
}
@@ -172,15 +185,12 @@ public class TokenNameFinderFactory extends BaseToolFactory {
AdaptiveFeatureGenerator generator;
try {
- generator = GeneratorFactory.create(descriptorIn, new FeatureGeneratorResourceProvider() {
-
- public Object getResource(String key) {
- if (artifactProvider != null) {
- return artifactProvider.getArtifact(key);
- }
- else {
- return resources.get(key);
- }
+ generator = GeneratorFactory.create(descriptorIn, key -> {
+ if (artifactProvider != null) {
+ return artifactProvider.getArtifact(key);
+ }
+ else {
+ return resources.get(key);
}
});
} catch (InvalidFormatException e) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
index a4780f5..ea2db50 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
@@ -23,18 +23,14 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
-import java.util.Collections;
import java.util.Map;
import java.util.Properties;
-
import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.SequenceCodec;
-import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
-import opennlp.tools.util.featuregen.AggregatedFeatureGenerator;
import opennlp.tools.util.featuregen.BrownCluster;
import opennlp.tools.util.featuregen.WordClusterDictionary;
import opennlp.tools.util.model.ArtifactSerializer;
@@ -58,9 +54,7 @@ public class TokenNameFinderModel extends BaseModel {
private static class ByteArraySerializer implements ArtifactSerializer<byte[]> {
- public byte[] create(InputStream in) throws IOException,
- InvalidFormatException {
-
+ public byte[] create(InputStream in) throws IOException {
return ModelUtil.read(in);
}
@@ -155,20 +149,6 @@ public class TokenNameFinderModel extends BaseModel {
checkArtifactMap();
}
- /**
- * @deprecated use getNameFinderSequenceModel instead. This method will be removed soon.
- */
- @Deprecated
- public MaxentModel getNameFinderModel() {
-
- if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) {
- return (MaxentModel) artifactMap.get(MAXENT_MODEL_ENTRY_NAME);
- }
- else {
- return null;
- }
- }
-
public SequenceClassificationModel<String> getNameFinderSequenceModel() {
Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
@@ -200,46 +180,6 @@ public class TokenNameFinderModel extends BaseModel {
return (TokenNameFinderFactory) this.toolFactory;
}
- // TODO: This should be moved to the NameFinderFactory ... !!!
- // Lets deprecate it!
-
- /**
- * Creates the {@link AdaptiveFeatureGenerator}. Usually this
- * is a set of generators contained in the {@link AggregatedFeatureGenerator}.
- *
- * Note:
- * The generators are created on every call to this method.
- *
- * @return the feature generator or null if there is no descriptor in the model
- * @deprecated use TokenNameFinderFactory.createFeatureGenerators instead!
- */
- @Deprecated
- public AdaptiveFeatureGenerator createFeatureGenerators() {
- return getFactory().createFeatureGenerators();
- }
-
- public TokenNameFinderModel updateFeatureGenerator(byte descriptor[]) {
-
- TokenNameFinderModel model;
-
- if (getNameFinderModel() != null) {
- model = new TokenNameFinderModel(getLanguage(), getNameFinderModel(), 1,
- descriptor, Collections.<String, Object>emptyMap(), Collections.<String, String>emptyMap(),
- getFactory().createSequenceCodec(), getFactory());
- }
- else {
- model = new TokenNameFinderModel(getLanguage(), getNameFinderSequenceModel(),
- descriptor, Collections.<String, Object>emptyMap(), Collections.<String, String>emptyMap(),
- getFactory().createSequenceCodec(), getFactory());
- }
-
- model.artifactMap.clear();
- model.artifactMap.putAll(artifactMap);
- model.artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, descriptor);
-
- return model;
- }
-
@Override
protected void createArtifactSerializers(Map<String, ArtifactSerializer> serializers) {
super.createArtifactSerializers(serializers);
@@ -276,7 +216,7 @@ public class TokenNameFinderModel extends BaseModel {
return serializers;
}
- boolean isModelValid(MaxentModel model) {
+ private boolean isModelValid(MaxentModel model) {
String outcomes[] = new String[model.getNumOutcomes()];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
index 630edc4..6115994 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
@@ -19,7 +19,6 @@ package opennlp.tools.postag;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -27,7 +26,6 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
-
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.util.BaseToolFactory;
@@ -95,12 +93,12 @@ public class POSTaggerFactory extends BaseToolFactory {
}
public TagDictionary createTagDictionary(File dictionary)
- throws InvalidFormatException, FileNotFoundException, IOException {
+ throws IOException {
return createTagDictionary(new FileInputStream(dictionary));
}
public TagDictionary createTagDictionary(InputStream in)
- throws InvalidFormatException, IOException {
+ throws IOException {
return POSDictionary.create(in);
}
@@ -146,8 +144,7 @@ public class POSTaggerFactory extends BaseToolFactory {
static class POSDictionarySerializer implements ArtifactSerializer<POSDictionary> {
- public POSDictionary create(InputStream in) throws IOException,
- InvalidFormatException {
+ public POSDictionary create(InputStream in) throws IOException {
return POSDictionary.create(new UncloseableInputStream(in));
}
@@ -164,13 +161,13 @@ public class POSTaggerFactory extends BaseToolFactory {
protected void validatePOSDictionary(POSDictionary posDict,
AbstractModel posModel) throws InvalidFormatException {
- Set<String> dictTags = new HashSet<String>();
+ Set<String> dictTags = new HashSet<>();
for (String word : posDict) {
Collections.addAll(dictTags, posDict.getTags(word));
}
- Set<String> modelTags = new HashSet<String>();
+ Set<String> modelTags = new HashSet<>();
for (int i = 0; i < posModel.getNumOutcomes(); i++) {
modelTags.add(posModel.getOutcome(i));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
index cc8b0d1..e55cc17 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
@@ -18,21 +18,19 @@
package opennlp.tools.namefind;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
import java.io.InputStream;
import java.util.Collections;
-
-import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
-
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
/**
* This is the test class for {@link NameFinderME}.
* <p>
@@ -74,7 +72,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
- params, (byte[]) null, Collections.<String, Object>emptyMap());
+ params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
TokenNameFinder nameFinder = new NameFinderME(nameFinderModel);
@@ -116,8 +114,6 @@ public class NameFinderMETest {
/**
* Train NamefinderME using AnnotatedSentencesWithTypes.txt with "person"
* nameType and try the model in a sample text.
- *
- * @throws Exception
*/
@Test
public void testNameFinderWithTypes() throws Exception {
@@ -137,7 +133,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
- params, (byte[]) null, Collections.<String, Object>emptyMap());
+ params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -167,8 +163,6 @@ public class NameFinderMETest {
/**
* Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it.
* This is related to the issue OPENNLP-9
- *
- * @throws Exception
*/
@Test
public void testOnlyWithNames() throws Exception {
@@ -186,7 +180,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
- params, (byte[]) null, Collections.<String, Object>emptyMap());
+ params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -206,8 +200,6 @@ public class NameFinderMETest {
/**
* Train NamefinderME using OnlyWithNamesWithTypes.train. The goal is to check if the model validator accepts it.
* This is related to the issue OPENNLP-9
- *
- * @throws Exception
*/
@Test
public void testOnlyWithNamesWithTypes() throws Exception {
@@ -225,7 +217,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
- params, (byte[]) null, Collections.<String, Object>emptyMap());
+ params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -247,8 +239,6 @@ public class NameFinderMETest {
/**
* Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it.
* This is related to the issue OPENNLP-9
- *
- * @throws Exception
*/
@Test
public void testOnlyWithEntitiesWithTypes() throws Exception {
@@ -266,7 +256,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
- params, (byte[]) null, Collections.<String, Object>emptyMap());
+ params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -283,13 +273,13 @@ public class NameFinderMETest {
}
private boolean hasOtherAsOutcome(TokenNameFinderModel nameFinderModel) {
- MaxentModel model = nameFinderModel.getNameFinderModel();
- for (int i = 0; i < model.getNumOutcomes(); i++) {
- String outcome = model.getOutcome(i);
- if (outcome.equals(NameFinderME.OTHER)) {
- return true;
- }
- }
+ SequenceClassificationModel<String> model = nameFinderModel.getNameFinderSequenceModel();
+ String[] outcomes = model.getOutcomes();
+ for (int i = 0; i < outcomes.length; i++) {
+ if (outcomes[i].equals(NameFinderME.OTHER)) {
+ return true;
+ }
+ }
return false;
}
@@ -304,8 +294,6 @@ public class NameFinderMETest {
/**
* Train NamefinderME using voa1.train with several
* nameTypes and try the model in a sample text.
- *
- * @throws Exception
*/
@Test
public void testNameFinderWithMultipleTypes() throws Exception {
@@ -323,7 +311,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
- params, (byte[]) null, Collections.<String, Object>emptyMap());
+ params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/dfbf6148/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
index 22567ee..bc22aa0 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
@@ -18,15 +18,10 @@
package opennlp.tools.namefind;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
import java.util.Collections;
import java.util.Map;
-
-import org.junit.Test;
-
import opennlp.tools.cmdline.namefind.NameEvaluationErrorListener;
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.util.InputStreamFactory;
@@ -34,13 +29,17 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelType;
+import org.junit.Test;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
public class TokenNameFinderCrossValidatorTest {
private final String TYPE = null;
@Test
- /**
+ /*
* Test that reproduces jira OPENNLP-463
*/
public void testWithNullResources() throws Exception {
@@ -67,7 +66,7 @@ public class TokenNameFinderCrossValidatorTest {
}
@Test
- /**
+ /*
* Test that tries to reproduce jira OPENNLP-466
*/
public void testWithNameEvaluationErrorListener() throws Exception {