You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/05 13:26:43 UTC
svn commit: r1574453 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind:
NameFinderME.java TokenNameFinderModel.java
Author: joern
Date: Wed Mar 5 12:26:43 2014
New Revision: 1574453
URL: http://svn.apache.org/r1574453
Log:
OPENNLP-641 Moved beam search parameter to the training parameters file
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1574453&r1=1574452&r2=1574453&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Wed Mar 5 12:26:43 2014
@@ -90,27 +90,28 @@ public class NameFinderME implements Tok
*
* @param model
* @param beamSize
+ *
+ * @deprecated the beam size is now configured during training time in the trainer parameter
+ * file via beamSearch.beamSize
*/
+ @Deprecated
public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize,
SequenceValidator<String> sequenceValidator) {
this.sequenceValidator = sequenceValidator;
-
- // TODO: The beam size should be stored in the model and passed in during training in the future.
- // At this point no assumption can be made about the underlying sequence classification!
-
+
// TODO: getNameFinderModel should be removed! Instead the model should always return
// a sequence classification model
// To maintain backward compatibility this should be done later, e.g. for 1.7.0
- if (model.getNameFinderModel() != null) {
- this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
- model.getNameFinderModel());
+ if (model.getNameFinderSequenceModel() != null) {
+ this.model = model.getNameFinderSequenceModel();
}
else {
- this.model = model.getNameFinderSequenceModel();
+ this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
+ model.getNameFinderModel());
}
-
+
// If generator is provided always use that one
if (generator != null) {
contextGenerator = new DefaultNameContextGenerator(generator);
@@ -132,19 +133,24 @@ public class NameFinderME implements Tok
if (this.sequenceValidator == null)
this.sequenceValidator = new NameFinderSequenceValidator();
- // TODO: Remove this!
- this.sequenceValidator = seqCodec.createSequenceValidator();
+ // TODO: How to combine different sequence validators ?!
-// if (this.model != null) {
-// beam = new BeamSearch<String>(beamSize, contextGenerator, this.model,
-// sequenceValidator, beamSize);
-// }
+ this.sequenceValidator = seqCodec.createSequenceValidator();
}
- public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
+ /**
+ * @deprecated the beam size is now configured during training time in the trainer parameter
+ * file via beamSearch.beamSize
+ */
+ @Deprecated public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
this(model, generator, beamSize, null);
}
+ /**
+ * @deprecated the beam size is now configured during training time in the trainer parameter
+ * file via beamSearch.beamSize
+ */
+ @Deprecated
public NameFinderME(TokenNameFinderModel model, int beamSize) {
this(model, null, beamSize);
}
@@ -299,6 +305,13 @@ public class NameFinderME implements Tok
TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final Map<String, Object> resources) throws IOException {
// SequenceCodec seqCodec = new BiolouCodec();
+ String beamSizeString = trainParams.getSettings()
+ .get(TokenNameFinderModel.BEAMSEARCH_BEAM_SIZE_PARAMETER);
+
+ int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+ if (beamSizeString != null) {
+ beamSize = Integer.parseInt(beamSizeString);
+ }
if (languageCode == null) {
throw new IllegalArgumentException("languageCode must not be null!");
@@ -350,7 +363,7 @@ public class NameFinderME implements Tok
resources, manifestInfoEntries);
}
else {
- return new TokenNameFinderModel(languageCode, nameFinderModel,
+ return new TokenNameFinderModel(languageCode, nameFinderModel, beamSize, null,
resources, manifestInfoEntries);
}
}
@@ -383,9 +396,7 @@ public class NameFinderME implements Tok
TokenNameFinderModel model = train(languageCode, type, samples, trainParams,
createFeatureGenerator(featureGeneratorBytes, resources), resources);
- // place the descriptor in the model
if (featureGeneratorBytes != null) {
- // TODO: This will not work!!! Method is broken.
model = model.updateFeatureGenerator(featureGeneratorBytes);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1574453&r1=1574452&r2=1574453&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java Wed Mar 5 12:26:43 2014
@@ -28,7 +28,9 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
+import java.util.Properties;
+import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.util.InvalidFormatException;
@@ -73,29 +75,41 @@ public class TokenNameFinderModel extend
private static final String GENERATOR_DESCRIPTOR_ENTRY_NAME = "generator.featuregen";
+ public static final String BEAMSEARCH_BEAM_SIZE_PARAMETER = "BeamSize";
+
public TokenNameFinderModel(String languageCode, SequenceClassificationModel nameFinderModel,
byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries);
// TODO: Add validation for sequence models!
- // if (!isModelValid(nameFinderModel)) {
+ //if (!isModelValid(nameFinderModel)) {
// throw new IllegalArgumentException("Model not compatible with name finder!");
- // }
+ //}
init(nameFinderModel, generatorDescriptor, resources, manifestInfoEntries);
}
-
- public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel,
+
+ public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, int beamSize,
byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
-
super(COMPONENT_NAME, languageCode, manifestInfoEntries);
if (!isModelValid(nameFinderModel)) {
throw new IllegalArgumentException("Model not compatible with name finder!");
}
-
+
+
+ Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+ manifest.put(TokenNameFinderModel.BEAMSEARCH_BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
+
init(nameFinderModel, generatorDescriptor, resources, manifestInfoEntries);
}
+
+ // TODO: Extend this one with beam size!
+ public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel,
+ byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
+ this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE,
+ generatorDescriptor, resources, manifestInfoEntries);
+ }
public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel,
Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
@@ -152,7 +166,20 @@ public class TokenNameFinderModel extend
}
public SequenceClassificationModel<String> getNameFinderSequenceModel() {
- if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
+
+ Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+
+ if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) {
+ String beamSizeString = manifest.getProperty(BEAMSEARCH_BEAM_SIZE_PARAMETER);
+
+ int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+ if (beamSizeString != null) {
+ beamSize = Integer.parseInt(beamSizeString);
+ }
+
+ return new BeamSearch<>(beamSize, (MaxentModel) artifactMap.get(MAXENT_MODEL_ENTRY_NAME));
+ }
+ else if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
return (SequenceClassificationModel) artifactMap.get(MAXENT_MODEL_ENTRY_NAME);
}
else {