You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/18 23:29:39 UTC
svn commit: r1148061 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer:
TokenizerCrossValidatorTool.java TokenizerTrainerTool.java
TrainingParameters.java TrainingParametersI.java TrainingParams.java
Author: colen
Date: Mon Jul 18 21:29:38 2011
New Revision: 1148061
URL: http://svn.apache.org/viewvc?rev=1148061&view=rev
Log:
OPENNLP-227 Updated Tokenizer trainer and cv tools
Added:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java (contents, props changed)
- copied, changed from r1147973, incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParametersI.java
Removed:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParameters.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParametersI.java
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java?rev=1148061&r1=1148060&r2=1148061&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java Mon Jul 18 21:29:38 2011
@@ -34,7 +34,7 @@ import opennlp.tools.util.eval.FMeasure;
public final class TokenizerCrossValidatorTool implements CmdLineTool {
- interface Parameters extends CVParams, TrainingParametersI {
+ interface CVToolParams extends CVParams, TrainingParams {
}
@@ -48,16 +48,16 @@ public final class TokenizerCrossValidat
public String getHelp() {
return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(Parameters.class);
+ + ArgumentParser.createUsage(CVToolParams.class);
}
public void run(String[] args) {
- if (!ArgumentParser.validateArguments(args, Parameters.class)) {
+ if (!ArgumentParser.validateArguments(args, CVToolParams.class)) {
System.err.println(getHelp());
throw new TerminateToolException(1);
}
- Parameters params = ArgumentParser.parse(args, Parameters.class);
+ CVToolParams params = ArgumentParser.parse(args, CVToolParams.class);
opennlp.tools.util.TrainingParameters mlParams = CmdLineUtil
.loadTrainingParameters(params.getParams(), false);
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java?rev=1148061&r1=1148060&r2=1148061&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java Mon Jul 18 21:29:38 2011
@@ -23,10 +23,12 @@ import java.io.IOException;
import java.nio.charset.Charset;
import opennlp.model.TrainUtil;
+import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.CLI;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.TrainingToolParams;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenSampleStream;
import opennlp.tools.tokenize.TokenizerModel;
@@ -34,6 +36,10 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
public final class TokenizerTrainerTool implements CmdLineTool {
+
+ interface TrainerToolParams extends TrainingParams, TrainingToolParams{
+
+ }
public String getName() {
return "TokenizerTrainer";
@@ -44,9 +50,8 @@ public final class TokenizerTrainerTool
}
public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName()
- + TrainingParameters.getParameterUsage() + " -data trainingData -model model\n" +
- TrainingParameters.getDescription();
+ return "Usage: " + CLI.CMD + " " + getName() + " "
+ + ArgumentParser.createUsage(TrainerToolParams.class);
}
static ObjectStream<TokenSample> openSampleData(String sampleDataName,
@@ -62,20 +67,16 @@ public final class TokenizerTrainerTool
}
public void run(String[] args) {
- if (args.length < 6) {
- System.out.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- TrainingParameters parameters = new TrainingParameters(args);
-
- if (!parameters.isValid()) {
- System.out.println(getHelp());
+ if (!ArgumentParser.validateArguments(args, TrainerToolParams.class)) {
+ System.err.println(getHelp());
throw new TerminateToolException(1);
}
+
+ TrainerToolParams params = ArgumentParser.parse(args,
+ TrainerToolParams.class);
opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), false);
+ CmdLineUtil.loadTrainingParameters(params.getParams(), false);
if (mlParams != null) {
if (!TrainUtil.isValid(mlParams.getSettings())) {
@@ -89,25 +90,25 @@ public final class TokenizerTrainerTool
}
}
- File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
- File modelOutFile = new File(CmdLineUtil.getParameter("-model", args));
+ File trainingDataInFile = params.getData();
+ File modelOutFile = params.getModel();
CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);
ObjectStream<TokenSample> sampleStream = openSampleData("Training",
- trainingDataInFile, parameters.getEncoding());
+ trainingDataInFile, params.getEncoding());
TokenizerModel model;
try {
if (mlParams == null) {
model = opennlp.tools.tokenize.TokenizerME.train(
- parameters.getLanguage(), sampleStream,
- parameters.isAlphaNumericOptimizationEnabled(),
- parameters.getCutoff(), parameters.getNumberOfIterations());
+ params.getLang(), sampleStream,
+ params.getAlphaNumOpt(),
+ params.getCutoff(), params.getIterations());
}
else {
model = opennlp.tools.tokenize.TokenizerME.train(
- parameters.getLanguage(), sampleStream,
- parameters.isAlphaNumericOptimizationEnabled(),
+ params.getLang(), sampleStream,
+ params.getAlphaNumOpt(),
mlParams);
}
} catch (IOException e) {
Copied: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java (from r1147973, incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParametersI.java)
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java?p2=incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java&p1=incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParametersI.java&r1=1147973&r2=1148061&rev=1148061&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParametersI.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java Mon Jul 18 21:29:38 2011
@@ -26,7 +26,7 @@ import opennlp.tools.cmdline.BasicTraini
*
* Note: Do not use this class, internal use only!
*/
-interface TrainingParametersI extends BasicTrainingParams {
+interface TrainingParams extends BasicTrainingParams {
@ParameterDescription(valueName = "isAlphaNumOpt", description = "Optimization flag to skip alpha numeric tokens for further tokenization")
@OptionalParameter(defaultValue = "false")
Boolean getAlphaNumOpt();
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java
------------------------------------------------------------------------------
svn:mime-type = text/plain