You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2016/11/22 00:40:29 UTC
opennlp git commit: Remove deprecated API from the POS Tagger
Repository: opennlp
Updated Branches:
refs/heads/trunk c657cdeda -> 639b9f0ae
Remove deprecated API from the POS Tagger
See issue OPENNLP-883
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/639b9f0a
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/639b9f0a
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/639b9f0a
Branch: refs/heads/trunk
Commit: 639b9f0ae2200d7a7365eeb43227ca067330bc83
Parents: c657cde
Author: J�rn Kottmann <jo...@apache.org>
Authored: Tue Nov 22 01:39:50 2016 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Tue Nov 22 01:39:50 2016 +0100
----------------------------------------------------------------------
.../opennlp/tools/postag/POSDictionary.java | 76 --------------
.../java/opennlp/tools/postag/POSModel.java | 55 ----------
.../java/opennlp/tools/postag/POSTagger.java | 29 -----
.../tools/postag/POSTaggerCrossValidator.java | 35 -------
.../java/opennlp/tools/postag/POSTaggerME.java | 105 +------------------
.../opennlp/tools/postag/POSTaggerMETest.java | 9 +-
.../java/opennlp/uima/postag/POSTagger.java | 8 +-
.../opennlp/uima/postag/POSTaggerTrainer.java | 22 ++--
8 files changed, 23 insertions(+), 316 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
index 7904d83..ef237a8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
@@ -65,82 +65,6 @@ public class POSDictionary implements Iterable<String>, MutableTagDictionary {
}
/**
- * Creates a tag dictionary with contents of specified file.
- *
- * @param file The file name for the tag dictionary.
- *
- * @throws IOException when the specified file can not be read.
- *
- * @deprecated Use {@link POSDictionary#create(InputStream)} instead, old format might removed.
- */
- @Deprecated
- public POSDictionary(String file) throws IOException {
- this(file, null, true);
- }
-
- /**
- * Creates a tag dictionary with contents of specified file and using specified
- * case to determine how to access entries in the tag dictionary.
- *
- * @param file The file name for the tag dictionary.
- * @param caseSensitive Specifies whether the tag dictionary is case sensitive or not.
- *
- * @throws IOException when the specified file can not be read.
- *
- * @deprecated Use {@link POSDictionary#create(InputStream)} instead, old format might removed.
- */
- @Deprecated
- public POSDictionary(String file, boolean caseSensitive) throws IOException {
- this(file, null, caseSensitive);
- }
-
-
- /**
- * Creates a tag dictionary with contents of specified file and using specified case to determine how to access entries in the tag dictionary.
- *
- * @param file The file name for the tag dictionary.
- * @param encoding The encoding of the tag dictionary file.
- * @param caseSensitive Specifies whether the tag dictionary is case sensitive or not.
- *
- * @throws IOException when the specified file can not be read.
- *
- * @deprecated Use {@link POSDictionary#create(InputStream)} instead, old format might removed.
- */
- @Deprecated
- public POSDictionary(String file, String encoding, boolean caseSensitive) throws IOException {
- this(new BufferedReader(encoding == null ? new FileReader(file) : new InputStreamReader(new FileInputStream(file),encoding)), caseSensitive);
- }
-
- /**
- * Create tag dictionary object with contents of specified file and using specified case to determine how to access entries in the tag dictionary.
- *
- * @param reader A reader for the tag dictionary.
- * @param caseSensitive Specifies whether the tag dictionary is case sensitive or not.
- *
- * @throws IOException when the specified file can not be read.
- *
- * @deprecated Use {@link POSDictionary#create(InputStream)} instead, old format might removed.
- */
- @Deprecated
- public POSDictionary(BufferedReader reader, boolean caseSensitive) throws IOException {
- dictionary = new HashMap<String, String[]>();
- this.caseSensitive = caseSensitive;
- for (String line = reader.readLine(); line != null; line = reader.readLine()) {
- String[] parts = line.split(" ");
- String[] tags = new String[parts.length - 1];
- for (int ti = 0, tl = parts.length - 1; ti < tl; ti++) {
- tags[ti] = parts[ti + 1];
- }
- if (caseSensitive) {
- dictionary.put(parts[0], tags);
- }
- else {
- dictionary.put(StringUtil.toLowerCase(parts[0]), tags);
- }
- }
- }
-
- /**
* Returns a list of valid tags for the specified word.
*
* @param word The word.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
index 446d1e6..34b9f79 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
@@ -45,29 +45,6 @@ public final class POSModel extends BaseModel {
public static final String POS_MODEL_ENTRY_NAME = "pos.model";
- /**
- * @deprecated Use
- * {@link #POSModel(String, MaxentModel, Map, POSTaggerFactory)}
- * instead.
- */
- public POSModel(String languageCode, MaxentModel posModel,
- POSDictionary tagDictionary, Dictionary ngramDict, Map<String, String> manifestInfoEntries) {
-
- this(languageCode, posModel, manifestInfoEntries, new POSTaggerFactory(
- ngramDict, tagDictionary));
- }
-
- /**
- * @deprecated Use
- * {@link #POSModel(String, MaxentModel, Map, POSTaggerFactory)}
- * instead.
- */
- public POSModel(String languageCode, MaxentModel posModel,
- POSDictionary tagDictionary, Dictionary ngramDict) {
- this(languageCode, posModel, POSTaggerME.DEFAULT_BEAM_SIZE, null, new POSTaggerFactory(ngramDict,
- tagDictionary));
- }
-
public POSModel(String languageCode, SequenceClassificationModel<String> posModel,
Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) {
@@ -139,7 +116,6 @@ public final class POSModel extends BaseModel {
* @deprecated use getPosSequenceModel instead. This method will be removed soon.
*/
@Deprecated
-
public MaxentModel getPosModel() {
if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof MaxentModel) {
return (MaxentModel) artifactMap.get(POS_MODEL_ENTRY_NAME);
@@ -171,37 +147,6 @@ public final class POSModel extends BaseModel {
}
}
- /**
- * Retrieves the tag dictionary.
- *
- * @return tag dictionary or null if not used
- *
- * @deprecated Use {@link POSModel#getFactory()} to get a
- * {@link POSTaggerFactory} and
- * {@link POSTaggerFactory#getTagDictionary()} to get a
- * {@link TagDictionary}.
- *
- * @throws IllegalStateException
- * if the TagDictionary is not an instance of POSDictionary
- */
- public POSDictionary getTagDictionary() {
- if (getFactory() != null) {
- TagDictionary dict = getFactory().getTagDictionary();
- if (dict != null) {
- if (dict instanceof POSDictionary) {
- return (POSDictionary) dict;
- }
- String clazz = dict.getClass().getCanonicalName();
- throw new IllegalStateException("Can not get a dictionary of type "
- + clazz
- + " using the deprecated method POSModel.getTagDictionary() "
- + "because it can only return dictionaries of type POSDictionary. "
- + "Use POSModel.getFactory().getTagDictionary() instead.");
- }
- }
- return null;
- }
-
public POSTaggerFactory getFactory() {
return (POSTaggerFactory) this.toolFactory;
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java
index 3cfc522..f081916 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java
@@ -28,18 +28,6 @@ public interface POSTagger {
/**
* Assigns the sentence of tokens pos tags.
- *
- * @param sentence
- * The sentence of tokens to be tagged.
- * @return a list of pos tags for each token provided in sentence.
- *
- * @deprecated call <code> tag(String[]) </code> instead
- */
- @Deprecated
- public List<String> tag(List<String> sentence);
-
- /**
- * Assigns the sentence of tokens pos tags.
* @param sentence The sentece of tokens to be tagged.
* @return an array of pos tags for each token provided in sentence.
*/
@@ -47,23 +35,6 @@ public interface POSTagger {
public String[] tag(String[] sentence, Object[] additionaContext);
- /**
- * Assigns the sentence of space-delimied tokens pos tags.
- * @param sentence The sentece of space-delimited tokens to be tagged.
- * @return a string of space-delimited pos tags for each token provided in sentence.
- *
- * @deprecated call <code> tag(String[]) instead </code> use WhiteSpaceTokenizer.INSTANCE.tokenize
- * to obtain the String array.
- */
- @Deprecated
- public String tag(String sentence);
-
- /**
- * @deprecated call <code> topKSequences(String[]) </code> instead
- */
- @Deprecated
- public Sequence[] topKSequences(List<String> sentence);
-
public Sequence[] topKSequences(String[] sentence);
public Sequence[] topKSequences(String[] sentence, Object[] additionaContext);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
index c767268..27854dc 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
@@ -79,41 +79,6 @@ public class POSTaggerCrossValidator {
}
/**
- * @deprecated use
- * {@link #POSTaggerCrossValidator(String, TrainingParameters, POSTaggerFactory, POSTaggerEvaluationMonitor...)}
- * instead and pass in a {@link POSTaggerFactory}.
- */
- public POSTaggerCrossValidator(String languageCode,
- TrainingParameters trainParam, POSDictionary tagDictionary,
- POSTaggerEvaluationMonitor... listeners) {
- this(languageCode, trainParam, create(null, tagDictionary), listeners);
- }
-
- /**
- * @deprecated use
- * {@link #POSTaggerCrossValidator(String, TrainingParameters, POSTaggerFactory, POSTaggerEvaluationMonitor...)}
- * instead and pass in the name of {@link POSTaggerFactory}
- * sub-class.
- */
- public POSTaggerCrossValidator(String languageCode,
- TrainingParameters trainParam, POSDictionary tagDictionary,
- Integer ngramCutoff, POSTaggerEvaluationMonitor... listeners) {
- this(languageCode, trainParam, create(null, tagDictionary), listeners);
- this.ngramCutoff = ngramCutoff;
- }
-
- /**
- * @deprecated use
- * {@link #POSTaggerCrossValidator(String, TrainingParameters, POSTaggerFactory, POSTaggerEvaluationMonitor...)}
- * instead and pass in a {@link POSTaggerFactory}.
- */
- public POSTaggerCrossValidator(String languageCode,
- TrainingParameters trainParam, POSDictionary tagDictionary,
- Dictionary ngramDictionary, POSTaggerEvaluationMonitor... listeners) {
- this(languageCode, trainParam, create(ngramDictionary, tagDictionary), listeners);
- }
-
- /**
* Starts the evaluation.
*
* @param samples
http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
index e2e5188..e4c1c1b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
@@ -90,39 +90,7 @@ public class POSTaggerME implements POSTagger {
private SequenceValidator<String> sequenceValidator;
/**
- * Initializes the current instance with the provided
- * model and provided beam size.
- *
- * @param model
- * @param beamSize
- *
- * @deprecated the beam size should be specified in the params during training
- */
- @Deprecated
- public POSTaggerME(POSModel model, int beamSize, int cacheSize) {
- POSTaggerFactory factory = model.getFactory();
-
- modelPackage = model;
-
- // TODO: Why is this the beam size?! not cache size?
- contextGen = factory.getPOSContextGenerator(beamSize);
- tagDictionary = factory.getTagDictionary();
- size = beamSize;
-
- sequenceValidator = factory.getSequenceValidator();
-
- if (model.getPosSequenceModel() != null) {
- this.model = model.getPosSequenceModel();
- }
- else {
- this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
- model.getPosModel(), cacheSize);
- }
- }
-
- /**
- * Initializes the current instance with the provided model
- * and the default beam size of 3.
+ * Initializes the current instance with the provided model.
*
* @param model
*/
@@ -156,21 +124,6 @@ public class POSTaggerME implements POSTagger {
}
/**
- * Returns the number of different tags predicted by this model.
- *
- * @return the number of different tags predicted by this model.
- * @deprecated use getAllPosTags instead!
- */
- @Deprecated
- public int getNumTags() {
-
- // TODO: Lets discuss on the dev list how to do this properly!
- // Nobody needs the number of tags, if the tags are not available.
-
- return model.getOutcomes().length;
- }
-
- /**
* Retrieves an array of all possible part-of-speech tags from the
* tagger.
*
@@ -180,12 +133,6 @@ public class POSTaggerME implements POSTagger {
return model.getOutcomes();
}
- @Deprecated
- public List<String> tag(List<String> sentence) {
- bestSequence = model.bestSequence(sentence.toArray(new String[sentence.size()]), null, contextGen, sequenceValidator);
- return bestSequence.getOutcomes();
- }
-
public String[] tag(String[] sentence) {
return this.tag(sentence, null);
}
@@ -215,12 +162,6 @@ public class POSTaggerME implements POSTagger {
return tags;
}
- @Deprecated
- public Sequence[] topKSequences(List<String> sentence) {
- return model.bestSequences(size, sentence.toArray(new String[sentence.size()]), null,
- contextGen, sequenceValidator);
- }
-
public Sequence[] topKSequences(String[] sentence) {
return this.topKSequences(sentence, null);
}
@@ -247,19 +188,6 @@ public class POSTaggerME implements POSTagger {
return bestSequence.getProbs();
}
- @Deprecated
- public String tag(String sentence) {
- List<String> toks = new ArrayList<String>();
- StringTokenizer st = new StringTokenizer(sentence);
- while (st.hasMoreTokens())
- toks.add(st.nextToken());
- List<String> tags = tag(toks);
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < tags.size(); i++)
- sb.append(toks.get(i) + "/" + tags.get(i) + " ");
- return sb.toString().trim();
- }
-
public String[] getOrderedTags(List<String> words, List<String> tags, int index) {
return getOrderedTags(words,tags,index,null);
}
@@ -349,37 +277,6 @@ public class POSTaggerME implements POSTagger {
}
}
- /**
- * @deprecated use
- * {@link #train(String, ObjectStream, TrainingParameters, POSTaggerFactory)}
- * instead and pass in a {@link POSTaggerFactory}.
- */
- public static POSModel train(String languageCode, ObjectStream<POSSample> samples, TrainingParameters trainParams,
- POSDictionary tagDictionary, Dictionary ngramDictionary) throws IOException {
-
- return train(languageCode, samples, trainParams, new POSTaggerFactory(
- ngramDictionary, tagDictionary));
- }
-
- /**
- * @deprecated use
- * {@link #train(String, ObjectStream, TrainingParameters, POSTaggerFactory)}
- * instead and pass in a {@link POSTaggerFactory} and a
- * {@link TrainingParameters}.
- */
- @Deprecated
- public static POSModel train(String languageCode, ObjectStream<POSSample> samples, ModelType modelType, POSDictionary tagDictionary,
- Dictionary ngramDictionary, int cutoff, int iterations) throws IOException {
-
- TrainingParameters params = new TrainingParameters();
-
- params.put(TrainingParameters.ALGORITHM_PARAM, modelType.toString());
- params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(iterations));
- params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(cutoff));
-
- return train(languageCode, samples, params, tagDictionary, ngramDictionary);
- }
-
public static Dictionary buildNGramDictionary(ObjectStream<POSSample> samples, int cutoff)
throws IOException {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
index 1d99687..996b233 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertEquals;
import java.io.IOException;
+import opennlp.tools.util.TrainingParameters;
import org.junit.Test;
import opennlp.tools.formats.ResourceAsStreamFactory;
@@ -50,8 +51,12 @@ public class POSTaggerMETest {
* @throws IOException
*/
static POSModel trainPOSModel(ModelType type) throws IOException {
- // TODO: also use tag dictionary for training
- return POSTaggerME.train("en", createSampleStream(), type, null, null, 5, 100);
+ TrainingParameters params = new TrainingParameters();
+ params.put(TrainingParameters.ALGORITHM_PARAM, type.toString());
+ params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
+ params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(5));
+
+ return POSTaggerME.train("en", createSampleStream(), params, new POSTaggerFactory());
}
@Test
http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
index 5e77e9d..2fdc47c 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
@@ -17,6 +17,7 @@
package opennlp.uima.postag;
+import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -127,7 +128,7 @@ public final class POSTagger extends CasAnnotator_ImplBase {
if (beamSize == null)
beamSize = POSTaggerME.DEFAULT_BEAM_SIZE;
- this.posTagger = new POSTaggerME(model, beamSize, 0);
+ this.posTagger = new POSTaggerME(model);
}
/**
@@ -174,7 +175,8 @@ public final class POSTagger extends CasAnnotator_ImplBase {
sentenceTokenList.add(tokenAnnotation.getCoveredText());
}
- final List<String> posTags = this.posTagger.tag(sentenceTokenList);
+ final List<String> posTags = Arrays.asList(this.posTagger.tag(
+ sentenceTokenList.toArray(new String[sentenceTokenList.size()])));
double posProbabilities[] = null;
@@ -231,4 +233,4 @@ public final class POSTagger extends CasAnnotator_ImplBase {
public void destroy() {
this.posTagger = null;
}
-}
\ No newline at end of file
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
index 9f377be..e9bb048 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
@@ -27,11 +27,9 @@ import java.util.Iterator;
import java.util.List;
import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.postag.POSDictionary;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSSample;
-import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.postag.*;
import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelType;
import opennlp.uima.util.AnnotatorUtil;
import opennlp.uima.util.CasConsumerUtil;
@@ -116,12 +114,8 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase {
TAG_DICTIONARY_NAME);
if (tagDictionaryName != null) {
- try {
- InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName);
-
- // TODO: ask Tom if case sensitivity must be configureable
- tagDictionary = new POSDictionary(new BufferedReader(new InputStreamReader(dictIn)), false);
-
+ try (InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName)) {
+ tagDictionary = POSDictionary.create(dictIn);
} catch (final IOException e) {
// if this fails just print error message and continue
final String message = "IOException during tag dictionary reading, "
@@ -207,9 +201,13 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase {
GIS.PRINT_MESSAGES = false;
+ TrainingParameters params = new TrainingParameters();
+ params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
+ params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(5));
+
POSModel posTaggerModel = POSTaggerME.train(language,
ObjectStreamUtils.createObjectStream(mPOSSamples),
- ModelType.MAXENT, tagDictionary, null, 100, 5);
+ params, new POSTaggerFactory(null, tagDictionary));
// dereference to allow garbage collection
mPOSSamples = null;
@@ -234,4 +232,4 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase {
// dereference to allow garbage collection
mPOSSamples = null;
}
-}
\ No newline at end of file
+}