You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/05 16:23:06 UTC

svn commit: r1574524 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: chunker/ ml/ namefind/ postag/

Author: joern
Date: Wed Mar  5 15:23:06 2014
New Revision: 1574524

URL: http://svn.apache.org/r1574524
Log:
OPENNLP-641 Moved beam search parameter to the training parameters file

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java Wed Mar  5 15:23:06 2014
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import opennlp.tools.ml.BeamSearch;
 import opennlp.tools.ml.EventTrainer;
 import opennlp.tools.ml.SequenceTrainer;
 import opennlp.tools.ml.TrainerFactory;
@@ -30,6 +31,7 @@ import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
 import opennlp.tools.ml.model.TrainUtil;
+import opennlp.tools.namefind.NameFinderME;
 import opennlp.tools.postag.POSSampleSequenceStream;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Sequence;
@@ -74,12 +76,12 @@ public class ChunkerME implements Chunke
     this.sequenceValidator = sequenceValidator;
     this.contextGenerator = contextGenerator;
     
-    if (model.getChunkerModel() != null) {
-      this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
-          model.getChunkerModel(), 0);
+    if (model.getChunkerSequenceModel() != null) {
+      this.model = model.getChunkerSequenceModel();
     }
     else {
-      this.model = model.getChunkerSequenceModel();
+      this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
+          model.getChunkerModel(), 0);
     }
   }
   
@@ -192,7 +194,14 @@ public class ChunkerME implements Chunke
   
   public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
       TrainingParameters mlParams, ChunkerFactory factory) throws IOException {
-
+    
+    String beamSizeString = mlParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
+    
+    int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+    if (beamSizeString != null) {
+      beamSize = Integer.parseInt(beamSizeString);
+    }
+    
     Map<String, String> manifestInfoEntries = new HashMap<String, String>();
 
     TrainerType trainerType = TrainerFactory.getTrainerType(mlParams.getSettings());

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java Wed Mar  5 15:23:06 2014
@@ -26,12 +26,15 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 import java.util.Map;
+import java.util.Properties;
 
+import opennlp.tools.ml.BeamSearch;
 import opennlp.tools.ml.model.AbstractModel;
 import opennlp.tools.ml.model.BinaryFileDataReader;
 import opennlp.tools.ml.model.GenericModelReader;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.namefind.NameFinderME;
 import opennlp.tools.util.BaseToolFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.BaseModel;
@@ -53,7 +56,7 @@ public class ChunkerModel extends BaseMo
    *             instead.
    */
   public ChunkerModel(String languageCode, MaxentModel chunkerModel, Map<String, String> manifestInfoEntries) {
-    this(languageCode, chunkerModel, manifestInfoEntries, new ChunkerFactory());
+    this(languageCode, chunkerModel, ChunkerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, new ChunkerFactory());
   }
   
   public ChunkerModel(String languageCode, SequenceClassificationModel<String> chunkerModel,
@@ -63,11 +66,19 @@ public class ChunkerModel extends BaseMo
     checkArtifactMap();
   }
 
-  
   public ChunkerModel(String languageCode, MaxentModel chunkerModel,
       Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
+    this(languageCode, chunkerModel, ChunkerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, factory);
+  }
+  
+  public ChunkerModel(String languageCode, MaxentModel chunkerModel, int beamSize,
+      Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
     artifactMap.put(CHUNKER_MODEL_ENTRY_NAME, chunkerModel);
+    
+    Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+    manifest.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
+    
     checkArtifactMap();
   }
   
@@ -105,6 +116,10 @@ public class ChunkerModel extends BaseMo
     }
   }
 
+  /**
+   * @deprecated use getChunkerSequenceModel instead. This method will be removed soon.
+   */
+  @Deprecated
   public MaxentModel getChunkerModel() {
     if (artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof MaxentModel) {
       return (MaxentModel) artifactMap.get(CHUNKER_MODEL_ENTRY_NAME);
@@ -115,7 +130,20 @@ public class ChunkerModel extends BaseMo
   }
   
   public SequenceClassificationModel<String> getChunkerSequenceModel() {
-    if (artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
+    
+    Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+    
+    if (artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof MaxentModel) {
+      String beamSizeString = manifest.getProperty(BeamSearch.BEAM_SIZE_PARAMETER);
+      
+      int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+      if (beamSizeString != null) {
+        beamSize = Integer.parseInt(beamSizeString);
+      }
+      
+      return new BeamSearch<>(beamSize, (MaxentModel) artifactMap.get(CHUNKER_MODEL_ENTRY_NAME));
+    }
+    else if (artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
       return (SequenceClassificationModel) artifactMap.get(CHUNKER_MODEL_ENTRY_NAME);
     }
     else {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java Wed Mar  5 15:23:06 2014
@@ -39,6 +39,8 @@ import opennlp.tools.util.SequenceValida
  */
 public class BeamSearch<T> implements SequenceClassificationModel<T> {
 
+  public static final String BEAM_SIZE_PARAMETER = "BeamSize";
+      
   private static final Object[] EMPTY_ADDITIONAL_CONTEXT = new Object[0];
 
   protected int size;

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Wed Mar  5 15:23:06 2014
@@ -29,6 +29,7 @@ import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import opennlp.tools.ml.BeamSearch;
 import opennlp.tools.ml.EventModelSequenceTrainer;
 import opennlp.tools.ml.EventTrainer;
 import opennlp.tools.ml.SequenceTrainer;
@@ -305,8 +306,7 @@ public class NameFinderME implements Tok
        TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final Map<String, Object> resources) throws IOException {
 
      // SequenceCodec seqCodec = new BiolouCodec();
-     String beamSizeString = trainParams.getSettings()
-         .get(TokenNameFinderModel.BEAMSEARCH_BEAM_SIZE_PARAMETER);
+     String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
      
      int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
      if (beamSizeString != null) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java Wed Mar  5 15:23:06 2014
@@ -75,8 +75,6 @@ public class TokenNameFinderModel extend
  
   private static final String GENERATOR_DESCRIPTOR_ENTRY_NAME = "generator.featuregen";
  
-  public static final String BEAMSEARCH_BEAM_SIZE_PARAMETER = "BeamSize";
-  
   public TokenNameFinderModel(String languageCode, SequenceClassificationModel nameFinderModel,
       byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries);
@@ -97,9 +95,8 @@ public class TokenNameFinderModel extend
       throw new IllegalArgumentException("Model not compatible with name finder!");
     }
     
-    
     Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
-    manifest.put(TokenNameFinderModel.BEAMSEARCH_BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
+    manifest.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
     
     init(nameFinderModel, generatorDescriptor, resources, manifestInfoEntries);
   }
@@ -151,10 +148,9 @@ public class TokenNameFinderModel extend
   }
   
   /**
-   * Retrieves the {@link TokenNameFinder} model.
-   *
-   * @return the classification model
+   * @deprecated use getNameFinderSequenceModel instead. This method will be removed soon.
    */
+  @Deprecated
   public MaxentModel getNameFinderModel() {
     
     if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) {
@@ -170,7 +166,7 @@ public class TokenNameFinderModel extend
     Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
     
     if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) {
-      String beamSizeString = manifest.getProperty(BEAMSEARCH_BEAM_SIZE_PARAMETER);
+      String beamSizeString = manifest.getProperty(BeamSearch.BEAM_SIZE_PARAMETER);
       
       int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
       if (beamSizeString != null) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java Wed Mar  5 15:23:06 2014
@@ -22,11 +22,14 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 import java.util.Map;
+import java.util.Properties;
 
 import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.ml.BeamSearch;
 import opennlp.tools.ml.model.AbstractModel;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.namefind.NameFinderME;
 import opennlp.tools.util.BaseToolFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.ArtifactSerializer;
@@ -63,7 +66,7 @@ public final class POSModel extends Base
    */
   public POSModel(String languageCode, MaxentModel posModel,
       POSDictionary tagDictionary, Dictionary ngramDict) {
-    this(languageCode, posModel, null, new POSTaggerFactory(ngramDict,
+    this(languageCode, posModel, POSTaggerME.DEFAULT_BEAM_SIZE, null, new POSTaggerFactory(ngramDict,
         tagDictionary));
   }
 
@@ -76,11 +79,17 @@ public final class POSModel extends Base
         throw new IllegalArgumentException("The maxentPosModel param must not be null!");
 
     artifactMap.put(POS_MODEL_ENTRY_NAME, posModel);
-    checkArtifactMap();
+    // TODO: This fails probably for the sequence model ... ?! 
+    // checkArtifactMap();
   }
-  
+
   public POSModel(String languageCode, MaxentModel posModel,
       Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) {
+    this(languageCode, posModel, POSTaggerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, posFactory);
+  }
+  
+  public POSModel(String languageCode, MaxentModel posModel, int beamSize,
+      Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) {
 
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, posFactory);
 
@@ -125,7 +134,11 @@ public final class POSModel extends Base
     }
   }
 
-  // TODO: This should be deprecated for the release ...
+  /**
+   * @deprecated use getPosSequenceModel instead. This method will be removed soon.
+   */
+  @Deprecated
+
   public MaxentModel getPosModel() {
     if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof MaxentModel) {
       return (MaxentModel) artifactMap.get(POS_MODEL_ENTRY_NAME);
@@ -136,7 +149,20 @@ public final class POSModel extends Base
   }
 
   public SequenceClassificationModel<String> getPosSequenceModel() {
-    if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
+    
+    Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+    
+    if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof MaxentModel) {
+      String beamSizeString = manifest.getProperty(BeamSearch.BEAM_SIZE_PARAMETER);
+      
+      int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+      if (beamSizeString != null) {
+        beamSize = Integer.parseInt(beamSizeString);
+      }
+      
+      return new BeamSearch<>(beamSize, (MaxentModel) artifactMap.get(POS_MODEL_ENTRY_NAME));
+    }
+    else if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) {
       return (SequenceClassificationModel) artifactMap.get(POS_MODEL_ENTRY_NAME);
     }
     else {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java?rev=1574524&r1=1574523&r2=1574524&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java Wed Mar  5 15:23:06 2014
@@ -28,6 +28,7 @@ import java.util.StringTokenizer;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.ml.BeamSearch;
 import opennlp.tools.ml.EventModelSequenceTrainer;
 import opennlp.tools.ml.EventTrainer;
 import opennlp.tools.ml.SequenceTrainer;
@@ -36,6 +37,7 @@ import opennlp.tools.ml.TrainerFactory.T
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.namefind.NameFinderME;
 import opennlp.tools.ngram.NGramModel;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Sequence;
@@ -106,12 +108,12 @@ public class POSTaggerME implements POST
     
     sequenceValidator = factory.getSequenceValidator();
     
-    if (model.getPosModel() != null) {
-      this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
-          model.getPosModel(), cacheSize);
+    if (model.getPosSequenceModel() != null) {
+      this.model = model.getPosSequenceModel();
     }
     else {
-      this.model = model.getPosSequenceModel();
+      this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize,
+          model.getPosModel(), cacheSize);
     }
   }
   
@@ -246,6 +248,13 @@ public class POSTaggerME implements POST
       ObjectStream<POSSample> samples, TrainingParameters trainParams,
       POSTaggerFactory posFactory) throws IOException {
     
+    String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
+    
+    int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;
+    if (beamSizeString != null) {
+      beamSize = Integer.parseInt(beamSizeString);
+    }
+    
     POSContextGenerator contextGenerator = posFactory.getPOSContextGenerator();
     
     Map<String, String> manifestInfoEntries = new HashMap<String, String>();