You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/05 13:26:43 UTC

svn commit: r1574453 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind: NameFinderME.java TokenNameFinderModel.java

Author: joern
Date: Wed Mar  5 12:26:43 2014
New Revision: 1574453

URL: http://svn.apache.org/r1574453
Log:
OPENNLP-641 Moved beam search parameter to the training parameters file

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1574453&r1=1574452&r2=1574453&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Wed Mar  5 12:26:43 2014
@@ -90,27 +90,28 @@ public class NameFinderME implements Tok
    *
    * @param model
    * @param beamSize
+   * 
+   * @deprecated the beam size is now configured during training time in the trainer parameter
+   * file via beamSearch.beamSize
    */
+  @Deprecated
   public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize,
       SequenceValidator<String> sequenceValidator) {
     
     this.sequenceValidator = sequenceValidator;
-    
-    // TODO: The beam size should be stored in the model and passed in during training in the future.
-    // At this point no assumption can be made about the underlying sequence classification!
-    
+   
     // TODO: getNameFinderModel should be removed! Instead the model should always return
     // a sequence classification model
     // To maintain backward compatibility this should be done later, e.g. for 1.7.0
     
-    if (model.getNameFinderModel() != null) {
-      this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
-          model.getNameFinderModel());
+    if (model.getNameFinderSequenceModel() != null) {
+      this.model = model.getNameFinderSequenceModel();
     }
     else {
-      this.model = model.getNameFinderSequenceModel();
+      this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
+          model.getNameFinderModel());
     }
-
+    
     // If generator is provided always use that one
     if (generator != null) {
       contextGenerator = new DefaultNameContextGenerator(generator);
@@ -132,19 +133,24 @@ public class NameFinderME implements Tok
     if (this.sequenceValidator == null)
       this.sequenceValidator = new NameFinderSequenceValidator();
 
-    // TODO: Remove this!
-    this.sequenceValidator = seqCodec.createSequenceValidator();
+    // TODO: How to combine different sequence validators ?!
     
-//    if (this.model != null) {
-//      beam = new BeamSearch<String>(beamSize, contextGenerator, this.model,
-//          sequenceValidator, beamSize);
-//    }
+    this.sequenceValidator = seqCodec.createSequenceValidator();
   }
 
-  public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
+  /**
+   * @deprecated the beam size is now configured during training time in the trainer parameter
+   * file via beamSearch.beamSize
+   */
+  @Deprecated  public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
     this(model, generator, beamSize, null);
   }
 
+  /**
+   * @deprecated the beam size is now configured during training time in the trainer parameter
+   * file via beamSearch.beamSize
+   */
+  @Deprecated
   public NameFinderME(TokenNameFinderModel model, int beamSize) {
     this(model, null, beamSize);
   }
@@ -299,6 +305,13 @@ public class NameFinderME implements Tok
        TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final Map<String, Object> resources) throws IOException {
 
      // SequenceCodec seqCodec = new BiolouCodec();
+     String beamSizeString = trainParams.getSettings()
+         .get(TokenNameFinderModel.BEAMSEARCH_BEAM_SIZE_PARAMETER);
+     
+     int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+     if (beamSizeString != null) {
+       beamSize = Integer.parseInt(beamSizeString);
+     }
      
      if (languageCode == null) {
        throw new IllegalArgumentException("languageCode must not be null!");
@@ -350,7 +363,7 @@ public class NameFinderME implements Tok
            resources, manifestInfoEntries);
      }
      else {
-       return new TokenNameFinderModel(languageCode, nameFinderModel,
+       return new TokenNameFinderModel(languageCode, nameFinderModel, beamSize, null,
            resources, manifestInfoEntries);
      }
    }
@@ -383,9 +396,7 @@ public class NameFinderME implements Tok
     TokenNameFinderModel model = train(languageCode, type, samples, trainParams,
         createFeatureGenerator(featureGeneratorBytes, resources), resources);
 
-    // place the descriptor in the model
     if (featureGeneratorBytes != null) {
-      // TODO: This will not work!!! Method is broken.
       model = model.updateFeatureGenerator(featureGeneratorBytes);
     }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1574453&r1=1574452&r2=1574453&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java Wed Mar  5 12:26:43 2014
@@ -28,7 +28,9 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 
+import opennlp.tools.ml.BeamSearch;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
 import opennlp.tools.util.InvalidFormatException;
@@ -73,29 +75,41 @@ public class TokenNameFinderModel extend
  
   private static final String GENERATOR_DESCRIPTOR_ENTRY_NAME = "generator.featuregen";
  
+  public static final String BEAMSEARCH_BEAM_SIZE_PARAMETER = "BeamSize";
+  
   public TokenNameFinderModel(String languageCode, SequenceClassificationModel nameFinderModel,
       byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries);
     
     // TODO: Add validation for sequence models!
-    // if (!isModelValid(nameFinderModel)) {
+    //if (!isModelValid(nameFinderModel)) {
     //  throw new IllegalArgumentException("Model not compatible with name finder!");
-    // }
+    //}
     
     init(nameFinderModel, generatorDescriptor, resources, manifestInfoEntries);
   }
-  
-  public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel,
+
+  public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, int beamSize,
       byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
-    
     super(COMPONENT_NAME, languageCode, manifestInfoEntries);
     
     if (!isModelValid(nameFinderModel)) {
       throw new IllegalArgumentException("Model not compatible with name finder!");
     }
-
+    
+    
+    Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+    manifest.put(TokenNameFinderModel.BEAMSEARCH_BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
+    
     init(nameFinderModel, generatorDescriptor, resources, manifestInfoEntries);
   }
+  
+  // TODO: Extend this one with beam size!
+  public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel,
+      byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
+    this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE, 
+        generatorDescriptor, resources, manifestInfoEntries);
+  }
 
   public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel,
       Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
@@ -152,7 +166,20 @@ public class TokenNameFinderModel extend
   }
 
   public SequenceClassificationModel<String> getNameFinderSequenceModel() {
-    if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
+    
+    Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+    
+    if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) {
+      String beamSizeString = manifest.getProperty(BEAMSEARCH_BEAM_SIZE_PARAMETER);
+      
+      int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+      if (beamSizeString != null) {
+        beamSize = Integer.parseInt(beamSizeString);
+      }
+      
+      return new BeamSearch<>(beamSize, (MaxentModel) artifactMap.get(MAXENT_MODEL_ENTRY_NAME));
+    }
+    else if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
       return (SequenceClassificationModel) artifactMap.get(MAXENT_MODEL_ENTRY_NAME);
     }
     else {