You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/05 16:23:06 UTC
svn commit: r1574524 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: chunker/ ml/
namefind/ postag/
Author: joern
Date: Wed Mar 5 15:23:06 2014
New Revision: 1574524
URL: http://svn.apache.org/r1574524
Log:
OPENNLP-641 Moved beam search parameter to the training parameters file
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java Wed Mar 5 15:23:06 2014
@@ -22,6 +22,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.SequenceTrainer;
import opennlp.tools.ml.TrainerFactory;
@@ -30,6 +31,7 @@ import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.ml.model.TrainUtil;
+import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.postag.POSSampleSequenceStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Sequence;
@@ -74,12 +76,12 @@ public class ChunkerME implements Chunke
this.sequenceValidator = sequenceValidator;
this.contextGenerator = contextGenerator;
- if (model.getChunkerModel() != null) {
- this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
- model.getChunkerModel(), 0);
+ if (model.getChunkerSequenceModel() != null) {
+ this.model = model.getChunkerSequenceModel();
}
else {
- this.model = model.getChunkerSequenceModel();
+ this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
+ model.getChunkerModel(), 0);
}
}
@@ -192,7 +194,14 @@ public class ChunkerME implements Chunke
public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
TrainingParameters mlParams, ChunkerFactory factory) throws IOException {
-
+
+ String beamSizeString = mlParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
+
+ int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+ if (beamSizeString != null) {
+ beamSize = Integer.parseInt(beamSizeString);
+ }
+
Map<String, String> manifestInfoEntries = new HashMap<String, String>();
TrainerType trainerType = TrainerFactory.getTrainerType(mlParams.getSettings());
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java Wed Mar 5 15:23:06 2014
@@ -26,12 +26,15 @@ import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Map;
+import java.util.Properties;
+import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.BinaryFileDataReader;
import opennlp.tools.ml.model.GenericModelReader;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.model.BaseModel;
@@ -53,7 +56,7 @@ public class ChunkerModel extends BaseMo
* instead.
*/
public ChunkerModel(String languageCode, MaxentModel chunkerModel, Map<String, String> manifestInfoEntries) {
- this(languageCode, chunkerModel, manifestInfoEntries, new ChunkerFactory());
+ this(languageCode, chunkerModel, ChunkerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, new ChunkerFactory());
}
public ChunkerModel(String languageCode, SequenceClassificationModel<String> chunkerModel,
@@ -63,11 +66,19 @@ public class ChunkerModel extends BaseMo
checkArtifactMap();
}
-
public ChunkerModel(String languageCode, MaxentModel chunkerModel,
Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
+ this(languageCode, chunkerModel, ChunkerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, factory);
+ }
+
+ public ChunkerModel(String languageCode, MaxentModel chunkerModel, int beamSize,
+ Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
artifactMap.put(CHUNKER_MODEL_ENTRY_NAME, chunkerModel);
+
+ Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+ manifest.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
+
checkArtifactMap();
}
@@ -105,6 +116,10 @@ public class ChunkerModel extends BaseMo
}
}
+ /**
+ * @deprecated use getChunkerSequenceModel instead. This method will be removed soon.
+ */
+ @Deprecated
public MaxentModel getChunkerModel() {
if (artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof MaxentModel) {
return (MaxentModel) artifactMap.get(CHUNKER_MODEL_ENTRY_NAME);
@@ -115,7 +130,20 @@ public class ChunkerModel extends BaseMo
}
public SequenceClassificationModel<String> getChunkerSequenceModel() {
- if (artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
+
+ Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+
+ if (artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof MaxentModel) {
+ String beamSizeString = manifest.getProperty(BeamSearch.BEAM_SIZE_PARAMETER);
+
+ int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+ if (beamSizeString != null) {
+ beamSize = Integer.parseInt(beamSizeString);
+ }
+
+ return new BeamSearch<>(beamSize, (MaxentModel) artifactMap.get(CHUNKER_MODEL_ENTRY_NAME));
+ }
+ else if (artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
return (SequenceClassificationModel) artifactMap.get(CHUNKER_MODEL_ENTRY_NAME);
}
else {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java Wed Mar 5 15:23:06 2014
@@ -39,6 +39,8 @@ import opennlp.tools.util.SequenceValida
*/
public class BeamSearch<T> implements SequenceClassificationModel<T> {
+ public static final String BEAM_SIZE_PARAMETER = "BeamSize";
+
private static final Object[] EMPTY_ADDITIONAL_CONTEXT = new Object[0];
protected int size;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Wed Mar 5 15:23:06 2014
@@ -29,6 +29,7 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.EventModelSequenceTrainer;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.SequenceTrainer;
@@ -305,8 +306,7 @@ public class NameFinderME implements Tok
TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final Map<String, Object> resources) throws IOException {
// SequenceCodec seqCodec = new BiolouCodec();
- String beamSizeString = trainParams.getSettings()
- .get(TokenNameFinderModel.BEAMSEARCH_BEAM_SIZE_PARAMETER);
+ String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
if (beamSizeString != null) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java Wed Mar 5 15:23:06 2014
@@ -75,8 +75,6 @@ public class TokenNameFinderModel extend
private static final String GENERATOR_DESCRIPTOR_ENTRY_NAME = "generator.featuregen";
- public static final String BEAMSEARCH_BEAM_SIZE_PARAMETER = "BeamSize";
-
public TokenNameFinderModel(String languageCode, SequenceClassificationModel nameFinderModel,
byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries);
@@ -97,9 +95,8 @@ public class TokenNameFinderModel extend
throw new IllegalArgumentException("Model not compatible with name finder!");
}
-
Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
- manifest.put(TokenNameFinderModel.BEAMSEARCH_BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
+ manifest.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
init(nameFinderModel, generatorDescriptor, resources, manifestInfoEntries);
}
@@ -151,10 +148,9 @@ public class TokenNameFinderModel extend
}
/**
- * Retrieves the {@link TokenNameFinder} model.
- *
- * @return the classification model
+ * @deprecated use getNameFinderSequenceModel instead. This method will be removed soon.
*/
+ @Deprecated
public MaxentModel getNameFinderModel() {
if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) {
@@ -170,7 +166,7 @@ public class TokenNameFinderModel extend
Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) {
- String beamSizeString = manifest.getProperty(BEAMSEARCH_BEAM_SIZE_PARAMETER);
+ String beamSizeString = manifest.getProperty(BeamSearch.BEAM_SIZE_PARAMETER);
int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
if (beamSizeString != null) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java Wed Mar 5 15:23:06 2014
@@ -22,11 +22,14 @@ import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Map;
+import java.util.Properties;
import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.model.ArtifactSerializer;
@@ -63,7 +66,7 @@ public final class POSModel extends Base
*/
public POSModel(String languageCode, MaxentModel posModel,
POSDictionary tagDictionary, Dictionary ngramDict) {
- this(languageCode, posModel, null, new POSTaggerFactory(ngramDict,
+ this(languageCode, posModel, POSTaggerME.DEFAULT_BEAM_SIZE, null, new POSTaggerFactory(ngramDict,
tagDictionary));
}
@@ -76,11 +79,17 @@ public final class POSModel extends Base
throw new IllegalArgumentException("The maxentPosModel param must not be null!");
artifactMap.put(POS_MODEL_ENTRY_NAME, posModel);
- checkArtifactMap();
+ // TODO: This fails probably for the sequence model ... ?!
+ // checkArtifactMap();
}
-
+
public POSModel(String languageCode, MaxentModel posModel,
Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) {
+ this(languageCode, posModel, POSTaggerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, posFactory);
+ }
+
+ public POSModel(String languageCode, MaxentModel posModel, int beamSize,
+ Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries, posFactory);
@@ -125,7 +134,11 @@ public final class POSModel extends Base
}
}
- // TODO: This should be deprecated for the release ...
+ /**
+ * @deprecated use getPosSequenceModel instead. This method will be removed soon.
+ */
+ @Deprecated
+
public MaxentModel getPosModel() {
if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof MaxentModel) {
return (MaxentModel) artifactMap.get(POS_MODEL_ENTRY_NAME);
@@ -136,7 +149,20 @@ public final class POSModel extends Base
}
public SequenceClassificationModel<String> getPosSequenceModel() {
- if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
+
+ Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+
+ if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof MaxentModel) {
+ String beamSizeString = manifest.getProperty(BeamSearch.BEAM_SIZE_PARAMETER);
+
+ int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+ if (beamSizeString != null) {
+ beamSize = Integer.parseInt(beamSizeString);
+ }
+
+ return new BeamSearch<>(beamSize, (MaxentModel) artifactMap.get(POS_MODEL_ENTRY_NAME));
+ }
+ else if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
return (SequenceClassificationModel) artifactMap.get(POS_MODEL_ENTRY_NAME);
}
else {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java Wed Mar 5 15:23:06 2014
@@ -28,6 +28,7 @@ import java.util.StringTokenizer;
import java.util.concurrent.atomic.AtomicInteger;
import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.EventModelSequenceTrainer;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.SequenceTrainer;
@@ -36,6 +37,7 @@ import opennlp.tools.ml.TrainerFactory.T
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.ngram.NGramModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Sequence;
@@ -106,12 +108,12 @@ public class POSTaggerME implements POST
sequenceValidator = factory.getSequenceValidator();
- if (model.getPosModel() != null) {
- this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
- model.getPosModel(), cacheSize);
+ if (model.getPosSequenceModel() != null) {
+ this.model = model.getPosSequenceModel();
}
else {
- this.model = model.getPosSequenceModel();
+ this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
+ model.getPosModel(), cacheSize);
}
}
@@ -246,6 +248,13 @@ public class POSTaggerME implements POST
ObjectStream<POSSample> samples, TrainingParameters trainParams,
POSTaggerFactory posFactory) throws IOException {
+ String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
+
+ int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+ if (beamSizeString != null) {
+ beamSize = Integer.parseInt(beamSizeString);
+ }
+
POSContextGenerator contextGenerator = posFactory.getPOSContextGenerator();
Map<String, String> manifestInfoEntries = new HashMap<String, String>();