You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/05/19 10:49:31 UTC
svn commit: r1124608 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools:
chunker/ cmdline/ cmdline/chunker/ cmdline/doccat/ cmdline/namefind/
cmdline/parser/ cmdline/postag/ cmdline/sentdetect/ cmdline/tokenizer/
sentdetect/ tokenize/
Author: joern
Date: Thu May 19 08:49:30 2011
New Revision: 1124608
URL: http://svn.apache.org/viewvc?rev=1124608&view=rev
Log:
OPENNLP-175 Updated cross validators to also use TrainingParameters object
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java Thu May 19 08:49:30 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.eval.CrossValidationPartitioner;
import opennlp.tools.util.eval.FMeasure;
@@ -29,13 +30,27 @@ public class ChunkerCrossValidator {
private final String languageCode;
private final int cutoff;
private final int iterations;
+
+ private final TrainingParameters params;
+
private FMeasure fmeasure = new FMeasure();
public ChunkerCrossValidator(String languageCode, int cutoff, int iterations) {
+
this.languageCode = languageCode;
this.cutoff = cutoff;
this.iterations = iterations;
+
+ params = null;
}
+
+ public ChunkerCrossValidator(String languageCode, TrainingParameters params) {
+ this.languageCode = languageCode;
+ this.params = params;
+
+ cutoff = -1;
+ iterations = -1;
+ }
public void evaluate(ObjectStream<ChunkSample> samples, int nFolds)
throws IOException, InvalidFormatException, IOException {
@@ -47,9 +62,17 @@ public class ChunkerCrossValidator {
CrossValidationPartitioner.TrainingSampleStream<ChunkSample> trainingSampleStream = partitioner
.next();
- ChunkerModel model = ChunkerME.train(languageCode, trainingSampleStream,
- cutoff, iterations);
-
+ ChunkerModel model;
+
+ if (params == null) {
+ model = ChunkerME.train(languageCode, trainingSampleStream,
+ cutoff, iterations);
+ }
+ else {
+ model = ChunkerME.train(languageCode, trainingSampleStream,
+ new DefaultChunkerContextGenerator(), params);
+ }
+
// do testing
ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model));
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java Thu May 19 08:49:30 2011
@@ -336,7 +336,8 @@ public final class CmdLineUtil {
}
// its optional, passing null is allowed
- public static TrainingParameters loadTrainingParameters(String paramFile) {
+ public static TrainingParameters loadTrainingParameters(String paramFile,
+ boolean supportSequenceTraining) {
TrainingParameters params = null;
@@ -360,6 +361,16 @@ public final class CmdLineUtil {
} catch (IOException e) {
}
}
+
+ if (!TrainUtil.isValid(params.getSettings())) {
+ System.err.println("Training parameters file is invalid!");
+ throw new TerminateToolException(-1);
+ }
+
+ if (!supportSequenceTraining && TrainUtil.isSequenceTraining(params.getSettings())) {
+ System.err.println("Sequence training is not supported!");
+ throw new TerminateToolException(-1);
+ }
}
return params;
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java Thu May 19 08:49:30 2011
@@ -79,19 +79,7 @@ public class ChunkerTrainerTool implemen
}
opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
-
- if (mlParams != null) {
- if (!TrainUtil.isValid(mlParams.getSettings())) {
- System.err.println("Training parameters file is invalid!");
- throw new TerminateToolException(-1);
- }
-
- if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
- System.err.println("Sequence training is not supported!");
- throw new TerminateToolException(-1);
- }
- }
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
File modelOutFile = new File(CmdLineUtil.getParameter("-model", args));
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java Thu May 19 08:49:30 2011
@@ -77,19 +77,7 @@ public class DoccatTrainerTool implement
}
opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
-
- if (mlParams != null) {
- if (!TrainUtil.isValid(mlParams.getSettings())) {
- System.err.println("Training parameters file is invalid!");
- throw new TerminateToolException(-1);
- }
-
- if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
- System.err.println("Sequence training is not supported!");
- throw new TerminateToolException(-1);
- }
- }
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
File modelOutFile = new File(CmdLineUtil.getParameter("-model", args));
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Thu May 19 08:49:30 2011
@@ -77,19 +77,7 @@ public final class TokenNameFinderTraine
}
opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
-
- if (mlParams != null) {
- if (!TrainUtil.isValid(mlParams.getSettings())) {
- System.err.println("Training parameters file is invalid!");
- throw new TerminateToolException(-1);
- }
-
- if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
- System.err.println("Sequence training is not supported!");
- throw new TerminateToolException(-1);
- }
- }
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
File modelOutFile = new File(CmdLineUtil.getParameter("-model", args));
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java Thu May 19 08:49:30 2011
@@ -109,18 +109,15 @@ public final class ParserTrainerTool imp
}
opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), true);
if (mlParams != null) {
+ // TODO: Validation is more complex ...
+
if (!TrainUtil.isValid(mlParams.getSettings())) {
System.err.println("Training parameters file is invalid!");
throw new TerminateToolException(-1);
}
-
- if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
- System.err.println("Sequence training is not supported!");
- throw new TerminateToolException(-1);
- }
}
ObjectStream<Parse> sampleStream = openTrainingData(new File(CmdLineUtil.getParameter("-data", args)), parameters.getEncoding());
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java Thu May 19 08:49:30 2011
@@ -76,7 +76,7 @@ public final class POSTaggerTrainerTool
}
opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), true);
if (mlParams != null && !TrainUtil.isValid(mlParams.getSettings())) {
System.err.println("Training parameters file is invalid!");
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java Thu May 19 08:49:30 2011
@@ -58,13 +58,23 @@ public final class SentenceDetectorCross
throw new TerminateToolException(1);
}
+ opennlp.tools.util.TrainingParameters mlParams =
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
+
File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
ObjectStream<SentenceSample> sampleStream = SentenceDetectorTrainerTool.openSampleData("Training Data",
trainingDataInFile, parameters.getEncoding());
- SDCrossValidator validator = new SDCrossValidator(parameters.getLanguage(), parameters.getCutoff(), parameters.getNumberOfIterations());
+ SDCrossValidator validator;
+
+ if (mlParams == null) {
+ validator = new SDCrossValidator(parameters.getLanguage(), parameters.getCutoff(), parameters.getNumberOfIterations());
+ }
+ else {
+ validator = new SDCrossValidator(parameters.getLanguage(), mlParams);
+ }
try {
validator.evaluate(sampleStream, 10);
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java Thu May 19 08:49:30 2011
@@ -76,7 +76,7 @@ public final class SentenceDetectorTrain
}
opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
if (mlParams != null) {
if (!TrainUtil.isValid(mlParams.getSettings())) {
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java Thu May 19 08:49:30 2011
@@ -59,6 +59,9 @@ public final class TokenizerCrossValidat
throw new TerminateToolException(1);
}
+ opennlp.tools.util.TrainingParameters mlParams =
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
+
File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
@@ -66,9 +69,18 @@ public final class TokenizerCrossValidat
TokenizerTrainerTool.openSampleData("Training Data",
trainingDataInFile, parameters.getEncoding());
- TokenizerCrossValidator validator =
- new opennlp.tools.tokenize.TokenizerCrossValidator(
- parameters.getLanguage(), parameters.isAlphaNumericOptimizationEnabled());
+
+ TokenizerCrossValidator validator;
+
+ if (mlParams == null) {
+ validator = new opennlp.tools.tokenize.TokenizerCrossValidator(
+ parameters.getLanguage(), parameters.isAlphaNumericOptimizationEnabled(),
+ parameters.getCutoff(), parameters.getNumberOfIterations());
+ }
+ else {
+ validator = new opennlp.tools.tokenize.TokenizerCrossValidator(
+ parameters.getLanguage(), parameters.isAlphaNumericOptimizationEnabled(), mlParams);
+ }
try {
validator.evaluate(sampleStream, 10);
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java Thu May 19 08:49:30 2011
@@ -75,7 +75,7 @@ public final class TokenizerTrainerTool
}
opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
+ CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
if (mlParams != null) {
if (!TrainUtil.isValid(mlParams.getSettings())) {
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java Thu May 19 08:49:30 2011
@@ -22,6 +22,7 @@ import java.io.IOException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.eval.CrossValidationPartitioner;
import opennlp.tools.util.eval.FMeasure;
@@ -31,15 +32,28 @@ import opennlp.tools.util.eval.FMeasure;
public class SDCrossValidator {
private final String languageCode;
+
private final int cutoff;
private final int iterations;
+ private final TrainingParameters params;
+
private FMeasure fmeasure = new FMeasure();
public SDCrossValidator(String languageCode, int cutoff, int iterations) {
+
this.languageCode = languageCode;
this.cutoff = cutoff;
this.iterations = iterations;
+
+ params = null;
+ }
+
+ public SDCrossValidator(String languageCode, TrainingParameters params) {
+ this.languageCode = languageCode;
+ this.params = params;
+ cutoff = -1;
+ iterations = -1;
}
public SDCrossValidator(String languageCode) {
@@ -56,7 +70,14 @@ public class SDCrossValidator {
CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream =
partitioner.next();
- SentenceModel model = SentenceDetectorME.train(languageCode, trainingSampleStream, true, null, cutoff, iterations);
+ SentenceModel model;
+
+ if (params == null) {
+ model = SentenceDetectorME.train(languageCode, trainingSampleStream, true, null, cutoff, iterations);
+ }
+ else {
+ model = SentenceDetectorME.train(languageCode, trainingSampleStream, true, null, params);
+ }
// do testing
SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java Thu May 19 08:49:30 2011
@@ -24,6 +24,7 @@ import java.io.ObjectStreamException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.eval.CrossValidationPartitioner;
import opennlp.tools.util.eval.FMeasure;
@@ -32,6 +33,8 @@ public class TokenizerCrossValidator {
private final String language;
private final boolean alphaNumericOptimization;
+ private final TrainingParameters params;
+
private final int cutoff;
private final int iterations;
@@ -43,12 +46,24 @@ public class TokenizerCrossValidator {
this.alphaNumericOptimization = alphaNumericOptimization;
this.cutoff = cutoff;
this.iterations = iterations;
+
+ params = null;
}
public TokenizerCrossValidator(String language, boolean alphaNumericOptimization) {
this(language, alphaNumericOptimization, 5, 100);
}
+ public TokenizerCrossValidator(String language, boolean alphaNumericOptimization, TrainingParameters params) {
+ this.language = language;
+ this.alphaNumericOptimization = alphaNumericOptimization;
+ this.cutoff = -1;
+ this.iterations = -1;
+
+ this.params = params;
+ }
+
+
public void evaluate(ObjectStream<TokenSample> samples, int nFolds)
throws IOException {
@@ -61,8 +76,16 @@ public class TokenizerCrossValidator {
partitioner.next();
// Maybe throws IOException if temporary file handling fails ...
- TokenizerModel model = TokenizerME.train(language, trainingSampleStream,
- alphaNumericOptimization, cutoff, iterations);
+ TokenizerModel model;
+
+ if (params == null) {
+ model = TokenizerME.train(language, trainingSampleStream,
+ alphaNumericOptimization, cutoff, iterations);
+ }
+ else {
+ model = TokenizerME.train(language, trainingSampleStream,
+ alphaNumericOptimization, params);
+ }
TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model));
evaluator.evaluate(trainingSampleStream.getTestSampleStream());