You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@opennlp.apache.org by Jörn Kottmann <ko...@gmail.com> on 2011/06/06 10:27:02 UTC

Re: svn commit: r1130898 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/namefind/TokenNameFinderTrainerTool.java namefind/NameFinderME.java

Hi,

I might be mistaken, but the train method you added also needs
to place the descriptor in the model. Very similar to the train method
which takes the descriptor, cutoff and iterations.

Jörn

On 6/3/11 7:34 AM, colen@apache.org wrote:
> Author: colen
> Date: Fri Jun  3 05:34:34 2011
> New Revision: 1130898
>
> URL: http://svn.apache.org/viewvc?rev=1130898&view=rev
> Log:
> OPENNLP-195 Added train method that takes params argument and the generatorDescriptor and resourceMap
>
> Modified:
>      incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>      incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>
> Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
> URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1130898&r1=1130897&r2=1130898&view=diff
> ==============================================================================
> --- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
> +++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Fri Jun  3 05:34:34 2011
> @@ -22,11 +22,9 @@ import java.io.FileInputStream;
>   import java.io.IOException;
>   import java.io.InputStream;
>   import java.nio.charset.Charset;
> -import java.util.Collections;
>   import java.util.HashMap;
>   import java.util.Map;
>
> -import opennlp.model.TrainUtil;
>   import opennlp.tools.cmdline.CLI;
>   import opennlp.tools.cmdline.CmdLineTool;
>   import opennlp.tools.cmdline.CmdLineUtil;
> @@ -187,8 +185,9 @@ public final class TokenNameFinderTraine
>              parameters.getCutoff());
>         }
>         else {
> -        model = opennlp.tools.namefind.NameFinderME.train(parameters.getLanguage(), parameters.getType(), sampleStream, mlParams, null,
> -            Collections.<String, Object>emptyMap());
> +        model = opennlp.tools.namefind.NameFinderME.train(
> +            parameters.getLanguage(), parameters.getType(), sampleStream,
> +            mlParams, featureGeneratorBytes, resources);
>         }
>       }
>       catch (IOException e) {
>
> Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
> URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1130898&r1=1130897&r2=1130898&view=diff
> ==============================================================================
> --- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original)
> +++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Fri Jun  3 05:34:34 2011
> @@ -19,10 +19,7 @@
>   package opennlp.tools.namefind;
>
>   import java.io.ByteArrayInputStream;
> -import java.io.FileInputStream;
> -import java.io.FileOutputStream;
>   import java.io.IOException;
> -import java.io.InputStreamReader;
>   import java.io.ObjectStreamException;
>   import java.util.ArrayList;
>   import java.util.Collections;
> @@ -40,11 +37,8 @@ import opennlp.model.EventStream;
>   import opennlp.model.MaxentModel;
>   import opennlp.model.TrainUtil;
>   import opennlp.model.TwoPassDataIndexer;
> -import opennlp.tools.postag.POSSampleSequenceStream;
>   import opennlp.tools.util.BeamSearch;
> -import opennlp.tools.util.HashSumEventStream;
>   import opennlp.tools.util.ObjectStream;
> -import opennlp.tools.util.PlainTextByLineStream;
>   import opennlp.tools.util.Sequence;
>   import opennlp.tools.util.SequenceValidator;
>   import opennlp.tools.util.Span;
> @@ -61,8 +55,6 @@ import opennlp.tools.util.featuregen.Sen
>   import opennlp.tools.util.featuregen.TokenClassFeatureGenerator;
>   import opennlp.tools.util.featuregen.TokenFeatureGenerator;
>   import opennlp.tools.util.featuregen.WindowFeatureGenerator;
> -import opennlp.tools.util.model.BaseModel;
> -import opennlp.tools.util.model.ModelUtil;
>
>   /**
>    * Class for creating a maximum-entropy-based name finder.
> @@ -210,6 +202,26 @@ public class NameFinderME implements Tok
>              });
>     }
>
> +  private static AdaptiveFeatureGenerator createFeatureGenerator(
> +      byte[] generatorDescriptor, final Map<String, Object>  resources)
> +      throws IOException {
> +    AdaptiveFeatureGenerator featureGenerator;
> +
> +    if (generatorDescriptor != null) {
> +      featureGenerator = GeneratorFactory.create(new ByteArrayInputStream(
> +          generatorDescriptor), new FeatureGeneratorResourceProvider() {
> +
> +        public Object getResource(String key) {
> +          return resources.get(key);
> +        }
> +      });
> +    } else {
> +      featureGenerator = null;
> +    }
> +
> +    return featureGenerator;
> +  }
> +
>     public Span[] find(String[] tokens) {
>       return find(tokens, EMPTY);
>     }
> @@ -328,6 +340,26 @@ public class NameFinderME implements Tok
>        return sprobs;
>      }
>
> +   /**
> +    * Trains a name finder model.
> +    *
> +    * @param languageCode
> +    *          the language of the training data
> +    * @param type
> +    *          null or an override type for all types in the training data
> +    * @param samples
> +    *          the training data
> +    * @param trainParams
> +    *          machine learning train parameters
> +    * @param generator
> +    *          null or the feature generator
> +    * @param resources
> +    *          the resources for the name finder or null if none
> +    *
> +    * @return the newly trained model
> +    *
> +    * @throws IOException
> +    */
>      public static TokenNameFinderModel train(String languageCode, String type, ObjectStream<NameSample>  samples,
>          TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final Map<String, Object>  resources) throws IOException {
>
> @@ -358,6 +390,34 @@ public class NameFinderME implements Tok
>            resources, manifestInfoEntries);
>      }
>
> +  /**
> +   * Trains a name finder model.
> +   *
> +   * @param languageCode
> +   *          the language of the training data
> +   * @param type
> +   *          null or an override type for all types in the training data
> +   * @param samples
> +   *          the training data
> +   * @param trainParams
> +   *          machine learning train parameters
> +   * @param featureGeneratorBytes
> +   *          descriptor to configure the feature generation or null
> +   * @param resources
> +   *          the resources for the name finder or null if none
> +   *
> +   * @return the newly trained model
> +   *
> +   * @throws IOException
> +   */
> +  public static TokenNameFinderModel train(String languageCode, String type,
> +      ObjectStream<NameSample>  samples, TrainingParameters trainParams,
> +      byte[] featureGeneratorBytes, final Map<String, Object>  resources)
> +      throws IOException {
> +    return train(languageCode, type, samples, trainParams,
> +        createFeatureGenerator(featureGeneratorBytes, resources), resources);
> +  }
> +
>      /**
>       * Trains a name finder model.
>       *
> @@ -403,19 +463,7 @@ public class NameFinderME implements Tok
>
>        // TODO: Pass in resource manager ...
>
> -     AdaptiveFeatureGenerator featureGenerator;
> -
> -     if (generatorDescriptor != null) {
> -       featureGenerator = GeneratorFactory.create(new ByteArrayInputStream(generatorDescriptor), new FeatureGeneratorResourceProvider() {
> -
> -        public Object getResource(String key) {
> -          return resources.get(key);
> -        }
> -      });
> -     }
> -     else {
> -       featureGenerator = null;
> -     }
> +     AdaptiveFeatureGenerator featureGenerator = createFeatureGenerator(generatorDescriptor, resources);
>
>        TokenNameFinderModel model = train(languageCode, type, samples, featureGenerator,
>            resources, iterations, cutoff);
> @@ -427,7 +475,6 @@ public class NameFinderME implements Tok
>        return model;
>      }
>
> -
>     @Deprecated
>     public static GISModel train(EventStream es, int iterations, int cut) throws IOException {
>       return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));
>
>


Re: svn commit: r1130898 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/namefind/TokenNameFinderTrainerTool.java namefind/NameFinderME.java

Posted by "william.colen@gmail.com" <wi...@gmail.com>.
I fixed it. Thank you for reviewing.


On Mon, Jun 6, 2011 at 5:27 AM, Jörn Kottmann <ko...@gmail.com> wrote:

> Hi,
>
> I might be mistaken, but the train method you added also needs
> to place the descriptor in the model. Very similar to the train method
> which takes the descriptor, cutoff and iterations.
>
> Jörn
>
>
> On 6/3/11 7:34 AM, colen@apache.org wrote:
>
>> Author: colen
>> Date: Fri Jun  3 05:34:34 2011
>> New Revision: 1130898
>>
>> URL: http://svn.apache.org/viewvc?rev=1130898&view=rev
>> Log:
>> OPENNLP-195 Added train method that takes params argument and the
>> generatorDescriptor and resourceMap
>>
>> Modified:
>>
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>>
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>>
>> Modified:
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>> URL:
>> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1130898&r1=1130897&r2=1130898&view=diff
>>
>> ==============================================================================
>> ---
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>> (original)
>> +++
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>> Fri Jun  3 05:34:34 2011
>> @@ -22,11 +22,9 @@ import java.io.FileInputStream;
>>  import java.io.IOException;
>>  import java.io.InputStream;
>>  import java.nio.charset.Charset;
>> -import java.util.Collections;
>>  import java.util.HashMap;
>>  import java.util.Map;
>>
>> -import opennlp.model.TrainUtil;
>>  import opennlp.tools.cmdline.CLI;
>>  import opennlp.tools.cmdline.CmdLineTool;
>>  import opennlp.tools.cmdline.CmdLineUtil;
>> @@ -187,8 +185,9 @@ public final class TokenNameFinderTraine
>>             parameters.getCutoff());
>>        }
>>        else {
>> -        model =
>> opennlp.tools.namefind.NameFinderME.train(parameters.getLanguage(),
>> parameters.getType(), sampleStream, mlParams, null,
>> -            Collections.<String, Object>emptyMap());
>> +        model = opennlp.tools.namefind.NameFinderME.train(
>> +            parameters.getLanguage(), parameters.getType(), sampleStream,
>> +            mlParams, featureGeneratorBytes, resources);
>>        }
>>      }
>>      catch (IOException e) {
>>
>> Modified:
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>> URL:
>> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1130898&r1=1130897&r2=1130898&view=diff
>>
>> ==============================================================================
>> ---
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>> (original)
>> +++
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>> Fri Jun  3 05:34:34 2011
>> @@ -19,10 +19,7 @@
>>  package opennlp.tools.namefind;
>>
>>  import java.io.ByteArrayInputStream;
>> -import java.io.FileInputStream;
>> -import java.io.FileOutputStream;
>>  import java.io.IOException;
>> -import java.io.InputStreamReader;
>>  import java.io.ObjectStreamException;
>>  import java.util.ArrayList;
>>  import java.util.Collections;
>> @@ -40,11 +37,8 @@ import opennlp.model.EventStream;
>>  import opennlp.model.MaxentModel;
>>  import opennlp.model.TrainUtil;
>>  import opennlp.model.TwoPassDataIndexer;
>> -import opennlp.tools.postag.POSSampleSequenceStream;
>>  import opennlp.tools.util.BeamSearch;
>> -import opennlp.tools.util.HashSumEventStream;
>>  import opennlp.tools.util.ObjectStream;
>> -import opennlp.tools.util.PlainTextByLineStream;
>>  import opennlp.tools.util.Sequence;
>>  import opennlp.tools.util.SequenceValidator;
>>  import opennlp.tools.util.Span;
>> @@ -61,8 +55,6 @@ import opennlp.tools.util.featuregen.Sen
>>  import opennlp.tools.util.featuregen.TokenClassFeatureGenerator;
>>  import opennlp.tools.util.featuregen.TokenFeatureGenerator;
>>  import opennlp.tools.util.featuregen.WindowFeatureGenerator;
>> -import opennlp.tools.util.model.BaseModel;
>> -import opennlp.tools.util.model.ModelUtil;
>>
>>  /**
>>   * Class for creating a maximum-entropy-based name finder.
>> @@ -210,6 +202,26 @@ public class NameFinderME implements Tok
>>             });
>>    }
>>
>> +  private static AdaptiveFeatureGenerator createFeatureGenerator(
>> +      byte[] generatorDescriptor, final Map<String, Object>  resources)
>> +      throws IOException {
>> +    AdaptiveFeatureGenerator featureGenerator;
>> +
>> +    if (generatorDescriptor != null) {
>> +      featureGenerator = GeneratorFactory.create(new
>> ByteArrayInputStream(
>> +          generatorDescriptor), new FeatureGeneratorResourceProvider() {
>> +
>> +        public Object getResource(String key) {
>> +          return resources.get(key);
>> +        }
>> +      });
>> +    } else {
>> +      featureGenerator = null;
>> +    }
>> +
>> +    return featureGenerator;
>> +  }
>> +
>>    public Span[] find(String[] tokens) {
>>      return find(tokens, EMPTY);
>>    }
>> @@ -328,6 +340,26 @@ public class NameFinderME implements Tok
>>       return sprobs;
>>     }
>>
>> +   /**
>> +    * Trains a name finder model.
>> +    *
>> +    * @param languageCode
>> +    *          the language of the training data
>> +    * @param type
>> +    *          null or an override type for all types in the training
>> data
>> +    * @param samples
>> +    *          the training data
>> +    * @param trainParams
>> +    *          machine learning train parameters
>> +    * @param generator
>> +    *          null or the feature generator
>> +    * @param resources
>> +    *          the resources for the name finder or null if none
>> +    *
>> +    * @return the newly trained model
>> +    *
>> +    * @throws IOException
>> +    */
>>     public static TokenNameFinderModel train(String languageCode, String
>> type, ObjectStream<NameSample>  samples,
>>         TrainingParameters trainParams, AdaptiveFeatureGenerator
>> generator, final Map<String, Object>  resources) throws IOException {
>>
>> @@ -358,6 +390,34 @@ public class NameFinderME implements Tok
>>           resources, manifestInfoEntries);
>>     }
>>
>> +  /**
>> +   * Trains a name finder model.
>> +   *
>> +   * @param languageCode
>> +   *          the language of the training data
>> +   * @param type
>> +   *          null or an override type for all types in the training data
>> +   * @param samples
>> +   *          the training data
>> +   * @param trainParams
>> +   *          machine learning train parameters
>> +   * @param featureGeneratorBytes
>> +   *          descriptor to configure the feature generation or null
>> +   * @param resources
>> +   *          the resources for the name finder or null if none
>> +   *
>> +   * @return the newly trained model
>> +   *
>> +   * @throws IOException
>> +   */
>> +  public static TokenNameFinderModel train(String languageCode, String
>> type,
>> +      ObjectStream<NameSample>  samples, TrainingParameters trainParams,
>> +      byte[] featureGeneratorBytes, final Map<String, Object>  resources)
>> +      throws IOException {
>> +    return train(languageCode, type, samples, trainParams,
>> +        createFeatureGenerator(featureGeneratorBytes, resources),
>> resources);
>> +  }
>> +
>>     /**
>>      * Trains a name finder model.
>>      *
>> @@ -403,19 +463,7 @@ public class NameFinderME implements Tok
>>
>>       // TODO: Pass in resource manager ...
>>
>> -     AdaptiveFeatureGenerator featureGenerator;
>> -
>> -     if (generatorDescriptor != null) {
>> -       featureGenerator = GeneratorFactory.create(new
>> ByteArrayInputStream(generatorDescriptor), new
>> FeatureGeneratorResourceProvider() {
>> -
>> -        public Object getResource(String key) {
>> -          return resources.get(key);
>> -        }
>> -      });
>> -     }
>> -     else {
>> -       featureGenerator = null;
>> -     }
>> +     AdaptiveFeatureGenerator featureGenerator =
>> createFeatureGenerator(generatorDescriptor, resources);
>>
>>       TokenNameFinderModel model = train(languageCode, type, samples,
>> featureGenerator,
>>           resources, iterations, cutoff);
>> @@ -427,7 +475,6 @@ public class NameFinderME implements Tok
>>       return model;
>>     }
>>
>> -
>>    @Deprecated
>>    public static GISModel train(EventStream es, int iterations, int cut)
>> throws IOException {
>>      return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));
>>
>>
>>
>