You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/03/24 01:13:44 UTC
svn commit: r1304678 - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/tokenize/TokenizerME.java
main/java/opennlp/tools/tokenize/TokenizerModel.java
test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
Author: colen
Date: Sat Mar 24 00:13:44 2012
New Revision: 1304678
URL: http://svn.apache.org/viewvc?rev=1304678&view=rev
Log:
OPENNLP-482: TokenizerME should get configurations from TokenizerFactory. Removed unnecessary argument from TokenizerModel constructor.
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java?rev=1304678&r1=1304677&r2=1304678&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java Sat Mar 24 00:13:44 2012
@@ -121,9 +121,20 @@ public class TokenizerME extends Abstrac
private List<Span> newTokens;
public TokenizerME(TokenizerModel model) {
- this(model, new Factory());
+ TokenizerFactory factory = model.getFactory();
+ this.alphanumeric = factory.getAlphaNumericPattern();
+ this.cg = factory.getContextGenerator();
+ this.model = model.getMaxentModel();
+ this.useAlphaNumericOptimization = factory.isUseAlphaNumericOptmization();
+
+ newTokens = new ArrayList<Span>();
+ tokProbs = new ArrayList<Double>(50);
}
-
+
+ /**
+ * @deprecated use {@link TokenizerFactory} to extend the Tokenizer
+ * functionality
+ */
public TokenizerME(TokenizerModel model, Factory factory) {
String languageCode = model.getLanguage();
@@ -214,8 +225,6 @@ public class TokenizerME extends Abstrac
/**
* Trains a model for the {@link TokenizerME}.
*
- * @param languageCode
- * the language of the natural text
* @param samples
* the samples used for the training.
* @param factory
@@ -229,8 +238,7 @@ public class TokenizerME extends Abstrac
* during training. Or if reading from the {@link ObjectStream}
* fails.
*/
- public static TokenizerModel train(String languageCode,
- ObjectStream<TokenSample> samples, TokenizerFactory factory,
+ public static TokenizerModel train(ObjectStream<TokenSample> samples, TokenizerFactory factory,
TrainingParameters mlParams) throws IOException {
Map<String, String> manifestInfoEntries = new HashMap<String, String>();
@@ -242,7 +250,7 @@ public class TokenizerME extends Abstrac
AbstractModel maxentModel = TrainUtil.train(eventStream,
mlParams.getSettings(), manifestInfoEntries);
- return new TokenizerModel(languageCode, maxentModel, manifestInfoEntries,
+ return new TokenizerModel(maxentModel, manifestInfoEntries,
factory);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java?rev=1304678&r1=1304677&r2=1304678&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java Sat Mar 24 00:13:44 2012
@@ -50,14 +50,13 @@ public final class TokenizerModel extend
/**
* Initializes the current instance.
*
- * @param languageCode the language of the natural text
* @param tokenizerModel the model
* @param manifestInfoEntries the manifest
* @param tokenizerFactory the factory
*/
- public TokenizerModel(String languageCode, AbstractModel tokenizerModel,
+ public TokenizerModel(AbstractModel tokenizerModel,
Map<String, String> manifestInfoEntries, TokenizerFactory tokenizerFactory) {
- super(COMPONENT_NAME, languageCode, manifestInfoEntries, tokenizerFactory);
+ super(COMPONENT_NAME, tokenizerFactory.getLanguageCode(), manifestInfoEntries, tokenizerFactory);
artifactMap.put(TOKENIZER_MODEL_ENTRY, tokenizerModel);
checkArtifactMap();
}
@@ -75,7 +74,7 @@ public final class TokenizerModel extend
public TokenizerModel(String language, AbstractModel tokenizerMaxentModel,
Dictionary abbreviations, boolean useAlphaNumericOptimization,
Map<String, String> manifestInfoEntries) {
- this(language, tokenizerMaxentModel, manifestInfoEntries,
+ this(tokenizerMaxentModel, manifestInfoEntries,
new TokenizerFactory(language, abbreviations, useAlphaNumericOptimization, null));
}
Modified: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java?rev=1304678&r1=1304677&r2=1304678&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java (original)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java Sat Mar 24 00:13:44 2012
@@ -55,7 +55,7 @@ public class TokenizerFactoryTest {
private static TokenizerModel train(TokenizerFactory factory)
throws IOException {
- return TokenizerME.train(factory.getLanguageCode(), createSampleStream(),
+ return TokenizerME.train(createSampleStream(),
factory, TrainingParameters.defaultParams());
}