You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/03/24 01:13:44 UTC

svn commit: r1304678 - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/tokenize/TokenizerME.java main/java/opennlp/tools/tokenize/TokenizerModel.java test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java

Author: colen
Date: Sat Mar 24 00:13:44 2012
New Revision: 1304678

URL: http://svn.apache.org/viewvc?rev=1304678&view=rev
Log:
OPENNLP-482: TokenizerME should get configurations from TokenizerFactory. Removed unnecessary argument from TokenizerModel constructor.

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java?rev=1304678&r1=1304677&r2=1304678&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java Sat Mar 24 00:13:44 2012
@@ -121,9 +121,20 @@ public class TokenizerME extends Abstrac
   private List<Span> newTokens;
 
   public TokenizerME(TokenizerModel model) {
-    this(model, new Factory());
+    TokenizerFactory factory = model.getFactory();
+    this.alphanumeric = factory.getAlphaNumericPattern();
+    this.cg = factory.getContextGenerator();
+    this.model = model.getMaxentModel();
+    this.useAlphaNumericOptimization = factory.isUseAlphaNumericOptmization();
+
+    newTokens = new ArrayList<Span>();
+    tokProbs = new ArrayList<Double>(50);
   }
-  
+
+  /**
+   * @deprecated use {@link TokenizerFactory} to extend the Tokenizer
+   *             functionality
+   */
   public TokenizerME(TokenizerModel model, Factory factory) {
     String languageCode = model.getLanguage();
 
@@ -214,8 +225,6 @@ public class TokenizerME extends Abstrac
   /**
    * Trains a model for the {@link TokenizerME}.
    * 
-   * @param languageCode
-   *          the language of the natural text
    * @param samples
    *          the samples used for the training.
    * @param factory
@@ -229,8 +238,7 @@ public class TokenizerME extends Abstrac
    *           during training. Or if reading from the {@link ObjectStream}
    *           fails.
    */
-  public static TokenizerModel train(String languageCode,
-      ObjectStream<TokenSample> samples, TokenizerFactory factory,
+  public static TokenizerModel train(ObjectStream<TokenSample> samples, TokenizerFactory factory,
       TrainingParameters mlParams) throws IOException {
 
     Map<String, String> manifestInfoEntries = new HashMap<String, String>();
@@ -242,7 +250,7 @@ public class TokenizerME extends Abstrac
     AbstractModel maxentModel = TrainUtil.train(eventStream,
         mlParams.getSettings(), manifestInfoEntries);
 
-    return new TokenizerModel(languageCode, maxentModel, manifestInfoEntries,
+    return new TokenizerModel(maxentModel, manifestInfoEntries,
         factory);
   }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java?rev=1304678&r1=1304677&r2=1304678&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java Sat Mar 24 00:13:44 2012
@@ -50,14 +50,13 @@ public final class TokenizerModel extend
   /**
    * Initializes the current instance.
    * 
-   * @param languageCode the language of the natural text
    * @param tokenizerModel the model
    * @param manifestInfoEntries the manifest
    * @param tokenizerFactory the factory
    */
-  public TokenizerModel(String languageCode, AbstractModel tokenizerModel,
+  public TokenizerModel(AbstractModel tokenizerModel,
       Map<String, String> manifestInfoEntries, TokenizerFactory tokenizerFactory) {
-    super(COMPONENT_NAME, languageCode, manifestInfoEntries, tokenizerFactory);
+    super(COMPONENT_NAME, tokenizerFactory.getLanguageCode(), manifestInfoEntries, tokenizerFactory);
     artifactMap.put(TOKENIZER_MODEL_ENTRY, tokenizerModel);
     checkArtifactMap();
   }
@@ -75,7 +74,7 @@ public final class TokenizerModel extend
   public TokenizerModel(String language, AbstractModel tokenizerMaxentModel,
       Dictionary abbreviations, boolean useAlphaNumericOptimization,
       Map<String, String> manifestInfoEntries) {
-    this(language, tokenizerMaxentModel, manifestInfoEntries, 
+    this(tokenizerMaxentModel, manifestInfoEntries, 
         new TokenizerFactory(language, abbreviations, useAlphaNumericOptimization, null));
   }
 

Modified: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java?rev=1304678&r1=1304677&r2=1304678&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java (original)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java Sat Mar 24 00:13:44 2012
@@ -55,7 +55,7 @@ public class TokenizerFactoryTest {
 
   private static TokenizerModel train(TokenizerFactory factory)
       throws IOException {
-    return TokenizerME.train(factory.getLanguageCode(), createSampleStream(),
+    return TokenizerME.train(createSampleStream(),
         factory, TrainingParameters.defaultParams());
   }