You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/05/19 10:49:31 UTC

svn commit: r1124608 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: chunker/ cmdline/ cmdline/chunker/ cmdline/doccat/ cmdline/namefind/ cmdline/parser/ cmdline/postag/ cmdline/sentdetect/ cmdline/tokenizer/ sentdetect/ tokenize/

Author: joern
Date: Thu May 19 08:49:30 2011
New Revision: 1124608

URL: http://svn.apache.org/viewvc?rev=1124608&view=rev
Log:
OPENNLP-175 Updated cross validators to also use TrainingParameters object

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java Thu May 19 08:49:30 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.eval.CrossValidationPartitioner;
 import opennlp.tools.util.eval.FMeasure;
 
@@ -29,13 +30,27 @@ public class ChunkerCrossValidator {
 	private final String languageCode;
 	private final int cutoff;
 	private final int iterations;
+	
+	private final TrainingParameters params;
+	
 	private FMeasure fmeasure = new FMeasure();
 
 	public ChunkerCrossValidator(String languageCode, int cutoff, int iterations) {
+	    
 		this.languageCode = languageCode;
 		this.cutoff = cutoff;
 		this.iterations = iterations;
+		
+		params = null;
 	}
+	
+    public ChunkerCrossValidator(String languageCode, TrainingParameters params) {
+      this.languageCode = languageCode;
+      this.params = params;
+      
+      cutoff = -1;
+      iterations = -1;
+    }
 
 	public void evaluate(ObjectStream<ChunkSample> samples, int nFolds)
 			throws IOException, InvalidFormatException, IOException {
@@ -47,9 +62,17 @@ public class ChunkerCrossValidator {
 			CrossValidationPartitioner.TrainingSampleStream<ChunkSample> trainingSampleStream = partitioner
 					.next();
 
-			ChunkerModel model = ChunkerME.train(languageCode, trainingSampleStream,
-					cutoff, iterations);
-
+			ChunkerModel model;
+			
+			if (params == null) {
+              model = ChunkerME.train(languageCode, trainingSampleStream,
+    	      cutoff, iterations);
+			}
+			else {
+			  model = ChunkerME.train(languageCode, trainingSampleStream,
+			      new DefaultChunkerContextGenerator(), params);
+			}
+			
 			// do testing
 			ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model));
 

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java Thu May 19 08:49:30 2011
@@ -336,7 +336,8 @@ public final class CmdLineUtil {
   }
   
   // its optional, passing null is allowed
-  public static TrainingParameters loadTrainingParameters(String paramFile) {
+  public static TrainingParameters loadTrainingParameters(String paramFile,
+      boolean supportSequenceTraining) {
     
     TrainingParameters params = null;
     
@@ -360,6 +361,16 @@ public final class CmdLineUtil {
         } catch (IOException e) {
         }
       }
+      
+      if (!TrainUtil.isValid(params.getSettings())) {
+        System.err.println("Training parameters file is invalid!");
+        throw new TerminateToolException(-1);
+      }
+      
+      if (!supportSequenceTraining && TrainUtil.isSequenceTraining(params.getSettings())) {
+        System.err.println("Sequence training is not supported!");
+        throw new TerminateToolException(-1);
+      }
     }
     
     return params;

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java Thu May 19 08:49:30 2011
@@ -79,19 +79,7 @@ public class ChunkerTrainerTool implemen
     }
     
     opennlp.tools.util.TrainingParameters mlParams = 
-      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
-    
-    if (mlParams != null) {
-      if (!TrainUtil.isValid(mlParams.getSettings())) {
-        System.err.println("Training parameters file is invalid!");
-        throw new TerminateToolException(-1);
-      }
-      
-      if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
-        System.err.println("Sequence training is not supported!");
-        throw new TerminateToolException(-1);
-      }
-    }
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
     
     File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
     File modelOutFile = new File(CmdLineUtil.getParameter("-model", args));

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java Thu May 19 08:49:30 2011
@@ -77,19 +77,7 @@ public class DoccatTrainerTool implement
     }
     
     opennlp.tools.util.TrainingParameters mlParams = 
-      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
-    
-    if (mlParams != null) {
-      if (!TrainUtil.isValid(mlParams.getSettings())) {
-        System.err.println("Training parameters file is invalid!");
-        throw new TerminateToolException(-1);
-      }
-      
-      if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
-        System.err.println("Sequence training is not supported!");
-        throw new TerminateToolException(-1);
-      }
-    }
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
     
     File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
     File modelOutFile = new File(CmdLineUtil.getParameter("-model", args));

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Thu May 19 08:49:30 2011
@@ -77,19 +77,7 @@ public final class TokenNameFinderTraine
     }
     
     opennlp.tools.util.TrainingParameters mlParams = 
-      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
-    
-    if (mlParams != null) {
-      if (!TrainUtil.isValid(mlParams.getSettings())) {
-        System.err.println("Training parameters file is invalid!");
-        throw new TerminateToolException(-1);
-      }
-      
-      if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
-        System.err.println("Sequence training is not supported!");
-        throw new TerminateToolException(-1);
-      }
-    }
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
     
     File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
     File modelOutFile = new File(CmdLineUtil.getParameter("-model", args));

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java Thu May 19 08:49:30 2011
@@ -109,18 +109,15 @@ public final class ParserTrainerTool imp
     } 
     
     opennlp.tools.util.TrainingParameters mlParams = 
-      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), true);
     
     if (mlParams != null) {
+      // TODO: Validation is more complex ... 
+      
       if (!TrainUtil.isValid(mlParams.getSettings())) {
         System.err.println("Training parameters file is invalid!");
         throw new TerminateToolException(-1);
       }
-      
-      if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
-        System.err.println("Sequence training is not supported!");
-        throw new TerminateToolException(-1);
-      }
     }
     
     ObjectStream<Parse> sampleStream = openTrainingData(new File(CmdLineUtil.getParameter("-data", args)), parameters.getEncoding());

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java Thu May 19 08:49:30 2011
@@ -76,7 +76,7 @@ public final class POSTaggerTrainerTool 
     }    
     
     opennlp.tools.util.TrainingParameters mlParams = 
-      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), true);
     
     if (mlParams != null && !TrainUtil.isValid(mlParams.getSettings())) {
       System.err.println("Training parameters file is invalid!");

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java Thu May 19 08:49:30 2011
@@ -58,13 +58,23 @@ public final class SentenceDetectorCross
       throw new TerminateToolException(1);
     }
     
+    opennlp.tools.util.TrainingParameters mlParams = 
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
+    
     File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
     CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
     
     ObjectStream<SentenceSample> sampleStream = SentenceDetectorTrainerTool.openSampleData("Training Data",
         trainingDataInFile, parameters.getEncoding());
     
-    SDCrossValidator validator = new SDCrossValidator(parameters.getLanguage(), parameters.getCutoff(), parameters.getNumberOfIterations());
+    SDCrossValidator validator;
+
+    if (mlParams == null) {
+      validator = new SDCrossValidator(parameters.getLanguage(), parameters.getCutoff(), parameters.getNumberOfIterations());
+    }
+    else {
+      validator = new SDCrossValidator(parameters.getLanguage(), mlParams);
+    }
     
     try {
       validator.evaluate(sampleStream, 10);

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java Thu May 19 08:49:30 2011
@@ -76,7 +76,7 @@ public final class SentenceDetectorTrain
     }
 
     opennlp.tools.util.TrainingParameters mlParams = 
-      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
     
     if (mlParams != null) {
       if (!TrainUtil.isValid(mlParams.getSettings())) {

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java Thu May 19 08:49:30 2011
@@ -59,6 +59,9 @@ public final class TokenizerCrossValidat
       throw new TerminateToolException(1);
     }
     
+    opennlp.tools.util.TrainingParameters mlParams = 
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
+    
     File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
     CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
     
@@ -66,9 +69,18 @@ public final class TokenizerCrossValidat
         TokenizerTrainerTool.openSampleData("Training Data",
         trainingDataInFile, parameters.getEncoding());
     
-    TokenizerCrossValidator validator =
-        new opennlp.tools.tokenize.TokenizerCrossValidator(
-        parameters.getLanguage(), parameters.isAlphaNumericOptimizationEnabled());
+    
+    TokenizerCrossValidator validator;
+
+    if (mlParams == null) {
+      validator = new opennlp.tools.tokenize.TokenizerCrossValidator(
+          parameters.getLanguage(), parameters.isAlphaNumericOptimizationEnabled(),
+          parameters.getCutoff(), parameters.getNumberOfIterations());
+    }
+    else {
+      validator = new opennlp.tools.tokenize.TokenizerCrossValidator(
+          parameters.getLanguage(), parameters.isAlphaNumericOptimizationEnabled(), mlParams);
+    }
       
     try {
       validator.evaluate(sampleStream, 10);

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java Thu May 19 08:49:30 2011
@@ -75,7 +75,7 @@ public final class TokenizerTrainerTool 
     }
 
     opennlp.tools.util.TrainingParameters mlParams = 
-      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args));
+      CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
     
     if (mlParams != null) {
       if (!TrainUtil.isValid(mlParams.getSettings())) {

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java Thu May 19 08:49:30 2011
@@ -22,6 +22,7 @@ import java.io.IOException;
 
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.eval.CrossValidationPartitioner;
 import opennlp.tools.util.eval.FMeasure;
 
@@ -31,15 +32,28 @@ import opennlp.tools.util.eval.FMeasure;
 public class SDCrossValidator {
   
   private final String languageCode;
+  
   private final int cutoff;
   private final int iterations;
   
+  private final TrainingParameters params;
+  
   private FMeasure fmeasure = new FMeasure();
   
   public SDCrossValidator(String languageCode, int cutoff, int iterations) {
+    
     this.languageCode = languageCode;
     this.cutoff = cutoff;
     this.iterations = iterations;
+    
+    params = null;
+  }
+  
+  public SDCrossValidator(String languageCode, TrainingParameters params) {
+    this.languageCode = languageCode;
+    this.params = params;
+    cutoff = -1;
+    iterations = -1;
   }
   
   public SDCrossValidator(String languageCode) {
@@ -56,7 +70,14 @@ public class SDCrossValidator {
      CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream =
          partitioner.next();
      
-      SentenceModel model = SentenceDetectorME.train(languageCode, trainingSampleStream, true, null, cutoff, iterations);
+      SentenceModel model; 
+      
+      if (params == null) {
+        model = SentenceDetectorME.train(languageCode, trainingSampleStream, true, null, cutoff, iterations);
+      }
+      else {
+        model = SentenceDetectorME.train(languageCode, trainingSampleStream, true, null, params);
+      }
       
       // do testing
       SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java?rev=1124608&r1=1124607&r2=1124608&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java Thu May 19 08:49:30 2011
@@ -24,6 +24,7 @@ import java.io.ObjectStreamException;
 
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.eval.CrossValidationPartitioner;
 import opennlp.tools.util.eval.FMeasure;
 
@@ -32,6 +33,8 @@ public class TokenizerCrossValidator {
   private final String language;
   private final boolean alphaNumericOptimization;
   
+  private final TrainingParameters params;
+  
   private final int cutoff;
   private final int iterations;
   
@@ -43,12 +46,24 @@ public class TokenizerCrossValidator {
     this.alphaNumericOptimization = alphaNumericOptimization;
     this.cutoff = cutoff;
     this.iterations = iterations;
+    
+    params = null;
   }
   
   public TokenizerCrossValidator(String language, boolean alphaNumericOptimization) {
     this(language, alphaNumericOptimization, 5, 100);
   }  
   
+  public TokenizerCrossValidator(String language, boolean alphaNumericOptimization, TrainingParameters params) {
+    this.language = language;
+    this.alphaNumericOptimization = alphaNumericOptimization;
+    this.cutoff = -1;
+    this.iterations = -1;
+    
+    this.params = params;
+  }
+  
+  
   public void evaluate(ObjectStream<TokenSample> samples, int nFolds) 
       throws IOException {
     
@@ -61,8 +76,16 @@ public class TokenizerCrossValidator {
          partitioner.next();
        
        // Maybe throws IOException if temporary file handling fails ...
-       TokenizerModel model = TokenizerME.train(language, trainingSampleStream, 
-           alphaNumericOptimization, cutoff, iterations);
+       TokenizerModel model;
+       
+       if (params == null) {
+         model = TokenizerME.train(language, trainingSampleStream, 
+             alphaNumericOptimization, cutoff, iterations);
+       }
+       else {
+         model = TokenizerME.train(language, trainingSampleStream, 
+             alphaNumericOptimization, params);
+       }
        
        TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model));
        evaluator.evaluate(trainingSampleStream.getTestSampleStream());