You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/12/01 21:08:58 UTC
svn commit: r1209220 [2/4] - in /incubator/opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/dictionary/
main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/...
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java Thu Dec 1 20:08:25 2011
@@ -25,22 +25,17 @@ import opennlp.tools.chunker.ChunkSample
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.chunker.DefaultChunkerSequenceValidator;
+import opennlp.tools.cmdline.BaseCLITool;
import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.PerformanceMonitor;
-import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.postag.POSSample;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
-public class ChunkerMETool implements CmdLineTool {
+public class ChunkerMETool extends BaseCLITool {
- public String getName() {
- return "ChunkerME";
- }
-
public String getShortDescription() {
return "learnable chunker";
}
@@ -52,46 +47,45 @@ public class ChunkerMETool implements Cm
public void run(String[] args) {
if (args.length != 1) {
System.out.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- ChunkerModel model = new ChunkerModelLoader().load(new File(args[0]));
-
- ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE,
- new DefaultChunkerSequenceValidator());
-
- ObjectStream<String> lineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
-
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
- perfMon.start();
-
- try {
- String line;
- while ((line = lineStream.read()) != null) {
-
- POSSample posSample;
- try {
- posSample = POSSample.parse(line);
- } catch (InvalidFormatException e) {
- System.err.println("Invalid format:");
- System.err.println(line);
- continue;
+ } else {
+ ChunkerModel model = new ChunkerModelLoader().load(new File(args[0]));
+
+ ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE,
+ new DefaultChunkerSequenceValidator());
+
+ ObjectStream<String> lineStream =
+ new PlainTextByLineStream(new InputStreamReader(System.in));
+
+ PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
+ perfMon.start();
+
+ try {
+ String line;
+ while ((line = lineStream.read()) != null) {
+
+ POSSample posSample;
+ try {
+ posSample = POSSample.parse(line);
+ } catch (InvalidFormatException e) {
+ System.err.println("Invalid format:");
+ System.err.println(line);
+ continue;
+ }
+
+ String[] chunks = chunker.chunk(posSample.getSentence(),
+ posSample.getTags());
+
+ System.out.println(new ChunkSample(posSample.getSentence(),
+ posSample.getTags(), chunks).nicePrint());
+
+ perfMon.incrementCounter();
}
-
- String[] chunks = chunker.chunk(posSample.getSentence(),
- posSample.getTags());
-
- System.out.println(new ChunkSample(posSample.getSentence(),
- posSample.getTags(), chunks).nicePrint());
-
- perfMon.incrementCounter();
}
- }
- catch (IOException e) {
- CmdLineUtil.handleStdinIoError(e);
+ catch (IOException e) {
+ CmdLineUtil.handleStdinIoError(e);
+ }
+
+ perfMon.stopAndPrintFinalResult();
}
-
- perfMon.stopAndPrintFinalResult();
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerModelLoader.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerModelLoader.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerModelLoader.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerModelLoader.java Thu Dec 1 20:08:25 2011
@@ -22,7 +22,6 @@ import java.io.InputStream;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.cmdline.ModelLoader;
-import opennlp.tools.util.InvalidFormatException;
/**
* Loads a Chunker Model for the command line tools.
@@ -36,8 +35,7 @@ public class ChunkerModelLoader extends
}
@Override
- protected ChunkerModel loadModel(InputStream modelIn) throws IOException,
- InvalidFormatException {
+ protected ChunkerModel loadModel(InputStream modelIn) throws IOException {
return new ChunkerModel(modelIn);
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java Thu Dec 1 20:08:25 2011
@@ -18,28 +18,27 @@
package opennlp.tools.cmdline.chunker;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
-import java.nio.charset.Charset;
import opennlp.tools.chunker.ChunkSample;
-import opennlp.tools.chunker.ChunkSampleStream;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.chunker.DefaultChunkerContextGenerator;
-import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.chunker.ChunkerTrainerTool.TrainerToolParams;
import opennlp.tools.cmdline.params.TrainingToolParams;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.model.ModelUtil;
-public class ChunkerTrainerTool implements CmdLineTool {
+public class ChunkerTrainerTool
+ extends AbstractTrainerTool<ChunkSample, TrainerToolParams> {
- interface TrainerToolParams extends TrainingParams, TrainingToolParams{
+ interface TrainerToolParams extends TrainingParams, TrainingToolParams {
+ }
+ public ChunkerTrainerTool() {
+ super(ChunkSample.class, TrainerToolParams.class);
}
public String getName() {
@@ -50,58 +49,24 @@ public class ChunkerTrainerTool implemen
return "trainer for the learnable chunker";
}
- public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(TrainerToolParams.class);
- }
-
- static ObjectStream<ChunkSample> openSampleData(String sampleDataName,
- File sampleDataFile, Charset encoding) {
- CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
-
- FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
-
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
- .getChannel(), encoding);
+ public void run(String format, String[] args) {
+ super.run(format, args);
- return new ChunkSampleStream(lineStream);
- }
-
- public void run(String[] args) {
-
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, TrainerToolParams.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
+ mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
+ if(mlParams == null) {
+ mlParams = ModelUtil.createTrainingParameters(params.getIterations(),
+ params.getCutoff());
}
-
- TrainerToolParams params = ArgumentParser.parse(args,
- TrainerToolParams.class);
-
- opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(params.getParams(), false);
-
- File trainingDataInFile = params.getData();
- File modelOutFile = params.getModel();
+ File modelOutFile = params.getModel();
CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);
- ObjectStream<ChunkSample> sampleStream =
- openSampleData("Training", trainingDataInFile, params.getEncoding());
-
+
ChunkerModel model;
try {
- if (mlParams == null) {
- model = ChunkerME.train(params.getLang(), sampleStream,
- params.getCutoff(), params.getIterations());
- }
- else {
- model = ChunkerME.train(params.getLang(), sampleStream,
- new DefaultChunkerContextGenerator(), mlParams);
- }
+ model = ChunkerME.train(factory.getLang(), sampleStream,
+ new DefaultChunkerContextGenerator(), mlParams);
} catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
}
finally {
try {
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java Thu Dec 1 20:08:25 2011
@@ -25,21 +25,14 @@ import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.Charset;
-import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.BaseCLITool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.dictionary.Dictionary;
-public class DictionaryBuilderTool implements CmdLineTool {
+public class DictionaryBuilderTool extends BaseCLITool {
interface Params extends DictionaryBuilderParams {
-
- }
-
- public String getName() {
- return "DictionaryBuilder";
}
public String getShortDescription() {
@@ -47,20 +40,11 @@ public class DictionaryBuilderTool imple
}
public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(Params.class);
-
+ return getBasicHelp(Params.class);
}
public void run(String[] args) {
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, Params.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- Params params = ArgumentParser.parse(args, Params.class);
+ Params params = validateAndParseParams(args, Params.class);
File dictInFile = params.getInputFile();
File dictOutFile = params.getOutputFile();
@@ -79,8 +63,7 @@ public class DictionaryBuilderTool imple
dict.serialize(out);
} catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
} finally {
try {
in.close();
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatConverterTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatConverterTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatConverterTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatConverterTool.java Thu Dec 1 20:08:25 2011
@@ -17,38 +17,12 @@
package opennlp.tools.cmdline.doccat;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
import opennlp.tools.cmdline.AbstractConverterTool;
-import opennlp.tools.cmdline.ObjectStreamFactory;
import opennlp.tools.doccat.DocumentSample;
-import opennlp.tools.formats.LeipzigDocumentSampleStreamFactory;
public class DoccatConverterTool extends AbstractConverterTool<DocumentSample> {
- private static final Map<String, ObjectStreamFactory<DocumentSample>> streamFactories;
-
- static {
- Map<String, ObjectStreamFactory<DocumentSample>> mutableStreamFactories =
- new HashMap<String, ObjectStreamFactory<DocumentSample>>();
-
- mutableStreamFactories.put("leipzig", new LeipzigDocumentSampleStreamFactory());
-
- streamFactories = Collections.unmodifiableMap(mutableStreamFactories);
- }
-
- public String getName() {
- return "DoccatConverter";
- }
-
- public String getShortDescription() {
- return "";
- }
-
- @Override
- protected ObjectStreamFactory<DocumentSample> createStreamFactory(String format) {
- return streamFactories.get(format);
+ public DoccatConverterTool() {
+ super(DocumentSample.class);
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatModelLoader.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatModelLoader.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatModelLoader.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatModelLoader.java Thu Dec 1 20:08:25 2011
@@ -22,7 +22,6 @@ import java.io.InputStream;
import opennlp.tools.cmdline.ModelLoader;
import opennlp.tools.doccat.DoccatModel;
-import opennlp.tools.util.InvalidFormatException;
/**
* Loads a Document Categorizer Model for the command line tools.
@@ -36,8 +35,7 @@ public class DoccatModelLoader extends M
}
@Override
- protected DoccatModel loadModel(InputStream modelIn) throws IOException,
- InvalidFormatException {
+ protected DoccatModel loadModel(InputStream modelIn) throws IOException {
return new DoccatModel(modelIn);
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java Thu Dec 1 20:08:25 2011
@@ -21,11 +21,10 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
+import opennlp.tools.cmdline.BaseCLITool;
import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.PerformanceMonitor;
-import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.doccat.DoccatModel;
import opennlp.tools.doccat.DocumentCategorizerME;
import opennlp.tools.doccat.DocumentSample;
@@ -33,12 +32,8 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ParagraphStream;
import opennlp.tools.util.PlainTextByLineStream;
-public class DoccatTool implements CmdLineTool {
+public class DoccatTool extends BaseCLITool {
- public String getName() {
- return "Doccat";
- }
-
public String getShortDescription() {
return "learnable document categorizer";
}
@@ -49,37 +44,37 @@ public class DoccatTool implements CmdLi
public void run(String[] args) {
- if (args.length != 1) {
+ if (0 == args.length) {
System.out.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- DoccatModel model = new DoccatModelLoader().load(new File(args[0]));
-
- DocumentCategorizerME doccat = new DocumentCategorizerME(model);
-
- ObjectStream<String> documentStream = new ParagraphStream(
- new PlainTextByLineStream(new InputStreamReader(System.in)));
-
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
- perfMon.start();
-
- try {
- String document;
- while ((document = documentStream.read()) != null) {
- double prob[] = doccat.categorize(document);
- String category = doccat.getBestCategory(prob);
-
- DocumentSample sample = new DocumentSample(category, document);
- System.out.println(sample.toString());
-
- perfMon.incrementCounter();
+ } else {
+
+ DoccatModel model = new DoccatModelLoader().load(new File(args[0]));
+
+ DocumentCategorizerME doccat = new DocumentCategorizerME(model);
+
+ ObjectStream<String> documentStream = new ParagraphStream(
+ new PlainTextByLineStream(new InputStreamReader(System.in)));
+
+ PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
+ perfMon.start();
+
+ try {
+ String document;
+ while ((document = documentStream.read()) != null) {
+ double prob[] = doccat.categorize(document);
+ String category = doccat.getBestCategory(prob);
+
+ DocumentSample sample = new DocumentSample(category, document);
+ System.out.println(sample.toString());
+
+ perfMon.incrementCounter();
+ }
}
+ catch (IOException e) {
+ CmdLineUtil.handleStdinIoError(e);
+ }
+
+ perfMon.stopAndPrintFinalResult();
}
- catch (IOException e) {
- CmdLineUtil.handleStdinIoError(e);
- }
-
- perfMon.stopAndPrintFinalResult();
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java Thu Dec 1 20:08:25 2011
@@ -18,88 +18,49 @@
package opennlp.tools.cmdline.doccat;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
-import java.nio.charset.Charset;
-import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.doccat.DoccatTrainerTool.TrainerToolParams;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.doccat.DoccatModel;
import opennlp.tools.doccat.DocumentCategorizerME;
import opennlp.tools.doccat.DocumentSample;
-import opennlp.tools.doccat.DocumentSampleStream;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.model.ModelUtil;
-public class DoccatTrainerTool implements CmdLineTool {
+public class DoccatTrainerTool
+ extends AbstractTrainerTool<DocumentSample, TrainerToolParams> {
- interface TrainerToolParams extends TrainingParams, TrainingToolParams{
-
+ interface TrainerToolParams extends TrainingParams, TrainingToolParams {
}
- public String getName() {
- return "DoccatTrainer";
+ public DoccatTrainerTool() {
+ super(DocumentSample.class, TrainerToolParams.class);
}
-
+
public String getShortDescription() {
return "trainer for the learnable document categorizer";
}
- public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(TrainerToolParams.class);
- }
-
- static ObjectStream<DocumentSample> openSampleData(String sampleDataName,
- File sampleDataFile, Charset encoding) {
- CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
+ public void run(String format, String[] args) {
+ super.run(format, args);
- FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
-
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
- .getChannel(), encoding);
-
- return new DocumentSampleStream(lineStream);
- }
-
- public void run(String[] args) {
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, TrainerToolParams.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
+ mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
+ if(mlParams == null) {
+ mlParams = ModelUtil.createTrainingParameters(params.getIterations(), params.getCutoff());
}
-
- TrainerToolParams params = ArgumentParser.parse(args,
- TrainerToolParams.class);
-
- opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(params.getParams(), false);
-
- File trainingDataInFile = params.getData();
+
File modelOutFile = params.getModel();
CmdLineUtil.checkOutputFile("document categorizer model", modelOutFile);
- ObjectStream<DocumentSample> sampleStream =
- openSampleData("Training", trainingDataInFile, params.getEncoding());
-
+
DoccatModel model;
try {
- if (mlParams == null) {
- model = DocumentCategorizerME.train(params.getLang(), sampleStream,
- params.getCutoff(), params.getIterations());
- }
- else {
- model = DocumentCategorizerME.train(params.getLang(), sampleStream,
- mlParams);
- }
+ model = DocumentCategorizerME.train(factory.getLang(), sampleStream, mlParams);
} catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
}
finally {
try {
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java Thu Dec 1 20:08:25 2011
@@ -24,11 +24,9 @@ import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
-import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.BaseCLITool;
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.dictionary.Dictionary;
@@ -44,7 +42,7 @@ import opennlp.tools.util.StringList;
* <br>
* <a href="http://www.census.gov/genealogy/names/names_files.html">www.census.gov</a>
*/
-public class CensusDictionaryCreatorTool implements CmdLineTool {
+public class CensusDictionaryCreatorTool extends BaseCLITool {
/**
* Create a list of expected parameters.
@@ -66,44 +64,22 @@ public class CensusDictionaryCreatorTool
String getDict();
}
- /**
- * Gets the name for the tool.
- *
- * @return {@code String} a name to be used to call this class.
- */
- public String getName() {
-
- return "CensusDictionaryCreator";
- }
-
- /**
- * Gets a short description for the tool.
- *
- * @return {@code String} a short description describing the purpose of
- * the tool to the user.
- */
public String getShortDescription() {
-
return "Converts 1990 US Census names into a dictionary";
}
- /**
- * Gets the expected usage of the tool as an example.
- *
- * @return {@code String} a descriptive example on how to properly call
- * the tool from the command line.
- */
- public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " " + ArgumentParser.createUsage(Parameters.class);
+ public String getHelp() {
+ return getBasicHelp(Parameters.class);
}
/**
- *
- * @param sampleStream
+ * Creates a dictionary.
+ *
+ * @param sampleStream stream of samples.
* @return a {@code Dictionary} class containing the name dictionary
* built from the input file.
- * @throws IOException
+ * @throws IOException IOException
*/
public static Dictionary createDictionary(ObjectStream<StringList> sampleStream) throws IOException {
@@ -121,23 +97,8 @@ public class CensusDictionaryCreatorTool
return mNameDictionary;
}
- /**
- * This method is much like the old main() method used in prior class
- * construction, and allows another main class to run() this classes method
- * to perform the operations.
- *
- * @param args a String[] array of arguments passed to the run method
- */
public void run(String[] args) {
-
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- Parameters params = ArgumentParser.parse(args, Parameters.class);
+ Parameters params = validateAndParseParams(args, Parameters.class);
File testData = new File(params.getCensusData());
File dictOutFile = new File(params.getDict());
@@ -154,8 +115,7 @@ public class CensusDictionaryCreatorTool
System.out.println("Creating Dictionary...");
mDictionary = createDictionary(sampleStream);
} catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
} finally {
try {
sampleStream.close();
@@ -172,8 +132,7 @@ public class CensusDictionaryCreatorTool
out = new FileOutputStream(dictOutFile);
mDictionary.serialize(out);
} catch (IOException ex) {
- System.err.println("Error during write to dictionary file: " + ex.getMessage());
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while writing dictionary file: " + ex.getMessage());
}
finally {
if (out != null)
@@ -181,9 +140,7 @@ public class CensusDictionaryCreatorTool
out.close();
} catch (IOException e) {
// file might be damaged
- System.err.println("Attention: Failed to correctly write dictionary:");
- System.err.println(e.getMessage());
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "Attention: Failed to correctly write dictionary:" + e.getMessage());
}
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderConverterTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderConverterTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderConverterTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderConverterTool.java Thu Dec 1 20:08:25 2011
@@ -17,48 +17,16 @@
package opennlp.tools.cmdline.namefind;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
import opennlp.tools.cmdline.AbstractConverterTool;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.formats.BioNLP2004NameSampleStreamFactory;
-import opennlp.tools.formats.Conll02NameSampleStreamFactory;
-import opennlp.tools.formats.Conll03NameSampleStreamFactory;
-import opennlp.tools.formats.ad.ADNameSampleStreamFactory;
import opennlp.tools.namefind.NameSample;
/**
- * Tool to convert multiple data formats into native opennlp name finder training
+ * Tool to convert multiple data formats into native OpenNLP name finder training
* format.
*/
public class TokenNameFinderConverterTool extends AbstractConverterTool<NameSample> {
- private static final Map<String, ObjectStreamFactory<NameSample>> streamFactories;
-
- static {
- Map<String, ObjectStreamFactory<NameSample>> mutableStreamFactories =
- new HashMap<String, ObjectStreamFactory<NameSample>>();
-
- mutableStreamFactories.put("conll02", new Conll02NameSampleStreamFactory());
- mutableStreamFactories.put("conll03", new Conll03NameSampleStreamFactory());
- mutableStreamFactories.put("ad", new ADNameSampleStreamFactory());
- mutableStreamFactories.put("bionlp2004", new BioNLP2004NameSampleStreamFactory());
-
- streamFactories = Collections.unmodifiableMap(mutableStreamFactories);
- }
-
- public String getName() {
- return "TokenNameFinderConverter";
- }
-
- public String getShortDescription() {
- return "converts foreign data formats to native format";
- }
-
- @Override
- protected ObjectStreamFactory<NameSample> createStreamFactory(String format) {
- return streamFactories.get(format);
+ public TokenNameFinderConverterTool() {
+ super(NameSample.class);
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java Thu Dec 1 20:08:25 2011
@@ -17,75 +17,51 @@
package opennlp.tools.cmdline.namefind;
-import java.io.File;
import java.io.IOException;
-import java.nio.charset.Charset;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.AbstractCrossValidatorTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderCrossValidatorTool.CVToolParams;
import opennlp.tools.cmdline.params.CVParams;
import opennlp.tools.cmdline.params.DetailedFMeasureEvaluatorParams;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.TokenNameFinderCrossValidator;
import opennlp.tools.namefind.TokenNameFinderEvaluationMonitor;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.eval.EvaluationMonitor;
+import opennlp.tools.util.model.ModelUtil;
-public final class TokenNameFinderCrossValidatorTool implements CmdLineTool {
+public final class TokenNameFinderCrossValidatorTool
+ extends AbstractCrossValidatorTool<NameSample, CVToolParams> {
- interface CVToolParams extends TrainingParams, CVParams, DetailedFMeasureEvaluatorParams{
-
+ interface CVToolParams extends TrainingParams, CVParams, DetailedFMeasureEvaluatorParams {
}
- public String getName() {
- return "TokenNameFinderCrossValidator";
+ public TokenNameFinderCrossValidatorTool() {
+ super(NameSample.class, CVToolParams.class);
}
public String getShortDescription() {
return "K-fold cross validator for the learnable Name Finder";
}
- public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(CVToolParams.class);
- }
+ public void run(String format, String[] args) {
+ super.run(format, args);
- public void run(String[] args) {
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, CVToolParams.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
+ mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
+ if (mlParams == null) {
+ mlParams = ModelUtil.createTrainingParameters(params.getIterations(), params.getCutoff());
}
-
- CVToolParams params = ArgumentParser.parse(args, CVToolParams.class);
-
- opennlp.tools.util.TrainingParameters mlParams = CmdLineUtil
- .loadTrainingParameters(params.getParams(),false);
-
- byte featureGeneratorBytes[] = TokenNameFinderTrainerTool
- .openFeatureGeneratorBytes(params.getFeaturegen());
-
- Map<String, Object> resources = TokenNameFinderTrainerTool
- .loadResources(params.getResources());
- File trainingDataInFile = params.getData();
- CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
-
- Charset encoding = params.getEncoding();
+ byte featureGeneratorBytes[] =
+ TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
- ObjectStream<NameSample> sampleStream = TokenNameFinderTrainerTool
- .openSampleData("Training Data", trainingDataInFile, encoding);
+ Map<String, Object> resources =
+ TokenNameFinderTrainerTool.loadResources(params.getResources());
- TokenNameFinderCrossValidator validator;
-
List<EvaluationMonitor<NameSample>> listeners = new LinkedList<EvaluationMonitor<NameSample>>();
if (params.getMisclassified()) {
listeners.add(new NameEvaluationErrorListener());
@@ -95,23 +71,15 @@ public final class TokenNameFinderCrossV
detailedFListener = new TokenNameFinderDetailedFMeasureListener();
listeners.add(detailedFListener);
}
-
- if (mlParams == null) {
- mlParams = new TrainingParameters();
- mlParams.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
- mlParams.put(TrainingParameters.ITERATIONS_PARAM,
- Integer.toString(params.getIterations()));
- mlParams.put(TrainingParameters.CUTOFF_PARAM,
- Integer.toString(params.getCutoff()));
- }
+ TokenNameFinderCrossValidator validator;
try {
- validator = new TokenNameFinderCrossValidator(params.getLang(),
- params.getType(), mlParams, featureGeneratorBytes, resources, listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()]));
+ validator = new TokenNameFinderCrossValidator(factory.getLang(),
+ params.getType(), mlParams, featureGeneratorBytes, resources,
+ listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()]));
validator.evaluate(sampleStream, params.getFolds());
} catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
} finally {
try {
sampleStream.close();
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java Thu Dec 1 20:08:25 2011
@@ -17,18 +17,14 @@
package opennlp.tools.cmdline.namefind;
-import java.io.File;
import java.io.IOException;
-import java.nio.charset.Charset;
import java.util.LinkedList;
import java.util.List;
-import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
-import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.AbstractEvaluatorTool;
import opennlp.tools.cmdline.PerformanceMonitor;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderEvaluatorTool.EvalToolParams;
import opennlp.tools.cmdline.params.DetailedFMeasureEvaluatorParams;
import opennlp.tools.cmdline.params.EvaluatorParams;
import opennlp.tools.namefind.NameFinderME;
@@ -39,44 +35,24 @@ import opennlp.tools.namefind.TokenNameF
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.eval.EvaluationMonitor;
-public final class TokenNameFinderEvaluatorTool implements CmdLineTool {
+public final class TokenNameFinderEvaluatorTool
+ extends AbstractEvaluatorTool<NameSample, EvalToolParams> {
interface EvalToolParams extends EvaluatorParams, DetailedFMeasureEvaluatorParams {
-
- }
-
- public String getName() {
- return "TokenNameFinderEvaluator";
}
- public String getShortDescription() {
- return "";
+ public TokenNameFinderEvaluatorTool() {
+ super(NameSample.class, EvalToolParams.class);
}
- public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(EvalToolParams.class);
+ public String getShortDescription() {
+ return "Measures the performance of the NameFinder model with the reference data";
}
- public void run(String[] args) {
-
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, EvalToolParams.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- EvalToolParams params = ArgumentParser.parse(args,
- EvalToolParams.class);
-
- File testData = params.getData();
- CmdLineUtil.checkInputFile("Test data", testData);
+ public void run(String format, String[] args) {
+ super.run(format, args);
- Charset encoding = params.getEncoding();
-
- TokenNameFinderModel model = new TokenNameFinderModelLoader().load(params
- .getModel());
+ TokenNameFinderModel model = new TokenNameFinderModelLoader().load(params.getModel());
List<EvaluationMonitor<NameSample>> listeners = new LinkedList<EvaluationMonitor<NameSample>>();
if (params.getMisclassified()) {
@@ -92,9 +68,6 @@ public final class TokenNameFinderEvalua
new NameFinderME(model),
listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()]));
- final ObjectStream<NameSample> sampleStream = TokenNameFinderTrainerTool.openSampleData("Test",
- testData, encoding);
-
final PerformanceMonitor monitor = new PerformanceMonitor("sent");
ObjectStream<NameSample> measuredSampleStream = new ObjectStream<NameSample>() {
@@ -119,8 +92,7 @@ public final class TokenNameFinderEvalua
evaluator.evaluate(measuredSampleStream);
} catch (IOException e) {
System.err.println("failed");
- System.err.println("Reading test data error " + e.getMessage());
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading test data: " + e.getMessage());
} finally {
try {
measuredSampleStream.close();
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java Thu Dec 1 20:08:25 2011
@@ -24,11 +24,10 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import opennlp.tools.cmdline.BaseCLITool;
import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.PerformanceMonitor;
-import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.TokenNameFinder;
@@ -38,12 +37,8 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
-public final class TokenNameFinderTool implements CmdLineTool {
+public final class TokenNameFinderTool extends BaseCLITool {
- public String getName() {
- return "TokenNameFinder";
- }
-
public String getShortDescription() {
return "learnable name finder";
}
@@ -56,59 +51,59 @@ public final class TokenNameFinderTool i
if (args.length == 0) {
System.out.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- NameFinderME nameFinders[] = new NameFinderME[args.length];
-
- for (int i = 0; i < nameFinders.length; i++) {
- TokenNameFinderModel model = new TokenNameFinderModelLoader().load(new File(args[i]));
- nameFinders[i] = new NameFinderME(model);
- }
-
- ObjectStream<String> untokenizedLineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
+ } else {
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
- perfMon.start();
-
- try {
- String line;
- while((line = untokenizedLineStream.read()) != null) {
- String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
-
- // A new line indicates a new document,
- // adaptive data must be cleared for a new document
-
- if (whitespaceTokenizerLine.length == 0) {
- for (NameFinderME nameFinder : nameFinders) {
- nameFinder.clearAdaptiveData();
+ NameFinderME nameFinders[] = new NameFinderME[args.length];
+
+ for (int i = 0; i < nameFinders.length; i++) {
+ TokenNameFinderModel model = new TokenNameFinderModelLoader().load(new File(args[i]));
+ nameFinders[i] = new NameFinderME(model);
+ }
+
+ ObjectStream<String> untokenizedLineStream =
+ new PlainTextByLineStream(new InputStreamReader(System.in));
+
+ PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
+ perfMon.start();
+
+ try {
+ String line;
+ while((line = untokenizedLineStream.read()) != null) {
+ String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+
+ // A new line indicates a new document,
+ // adaptive data must be cleared for a new document
+
+ if (whitespaceTokenizerLine.length == 0) {
+ for (NameFinderME nameFinder : nameFinders) {
+ nameFinder.clearAdaptiveData();
+ }
}
+
+ List<Span> names = new ArrayList<Span>();
+
+ for (TokenNameFinder nameFinder : nameFinders) {
+ Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine));
+ }
+
+ // Simple way to drop intersecting spans, otherwise the
+ // NameSample is invalid
+ Span reducedNames[] = NameFinderME.dropOverlappingSpans(
+ names.toArray(new Span[names.size()]));
+
+ NameSample nameSample = new NameSample(whitespaceTokenizerLine,
+ reducedNames, false);
+
+ System.out.println(nameSample.toString());
+
+ perfMon.incrementCounter();
}
-
- List<Span> names = new ArrayList<Span>();
-
- for (TokenNameFinder nameFinder : nameFinders) {
- Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine));
- }
-
- // Simple way to drop intersecting spans, otherwise the
- // NameSample is invalid
- Span reducedNames[] = NameFinderME.dropOverlappingSpans(
- names.toArray(new Span[names.size()]));
-
- NameSample nameSample = new NameSample(whitespaceTokenizerLine,
- reducedNames, false);
-
- System.out.println(nameSample.toString());
-
- perfMon.incrementCounter();
}
+ catch (IOException e) {
+ CmdLineUtil.handleStdinIoError(e);
+ }
+
+ perfMon.stopAndPrintFinalResult();
}
- catch (IOException e) {
- CmdLineUtil.handleStdinIoError(e);
- }
-
- perfMon.stopAndPrintFinalResult();
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Thu Dec 1 20:08:25 2011
@@ -18,62 +18,36 @@
package opennlp.tools.cmdline.namefind;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
-import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool.TrainerToolParams;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.namefind.NameSample;
-import opennlp.tools.namefind.NameSampleDataStream;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.InvalidFormatException;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.model.ArtifactSerializer;
import opennlp.tools.util.model.ModelUtil;
-/**
- * <b>Note:</b> Do not use this class, internal use only!
- */
-public final class TokenNameFinderTrainerTool implements CmdLineTool {
+public final class TokenNameFinderTrainerTool
+ extends AbstractTrainerTool<NameSample, TrainerToolParams> {
- interface TrainerToolParams extends TrainingParams, TrainingToolParams{
-
+ interface TrainerToolParams extends TrainingParams, TrainingToolParams {
}
- public String getName() {
- return "TokenNameFinderTrainer";
+ public TokenNameFinderTrainerTool() {
+ super(NameSample.class, TrainerToolParams.class);
}
-
+
public String getShortDescription() {
return "trainer for the learnable name finder";
}
- public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(TrainerToolParams.class);
- }
-
- static ObjectStream<NameSample> openSampleData(String sampleDataName,
- File sampleDataFile, Charset encoding) {
- CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
-
- FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
-
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
- .getChannel(), encoding);
-
- return new NameSampleDataStream(lineStream);
- }
-
static byte[] openFeatureGeneratorBytes(String featureGenDescriptorFile) {
if(featureGenDescriptorFile != null) {
return openFeatureGeneratorBytes(new File(featureGenDescriptorFile));
@@ -90,8 +64,7 @@ public final class TokenNameFinderTraine
try {
featureGeneratorBytes = ModelUtil.read(bytesIn);
} catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
} finally {
try {
bytesIn.close();
@@ -168,54 +141,35 @@ public final class TokenNameFinderTraine
return new HashMap<String, Object>();
}
- public void run(String[] args) {
-
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, TrainerToolParams.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
+ public void run(String format, String[] args) {
+ super.run(format, args);
+
+ mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
+ if(mlParams == null) {
+ mlParams = ModelUtil.createTrainingParameters(params.getIterations(), params.getCutoff());
}
-
- TrainerToolParams params = ArgumentParser.parse(args,
- TrainerToolParams.class);
-
- opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(params.getParams(), true);
-
- File trainingDataInFile = params.getData();
+
File modelOutFile = params.getModel();
-
-
+
byte featureGeneratorBytes[] = openFeatureGeneratorBytes(params.getFeaturegen());
-
- // TODO: Support Custom resources:
+
+ // TODO: Support Custom resources:
// Must be loaded into memory, or written to tmp file until descriptor
// is loaded which defines parses when model is loaded
Map<String, Object> resources = loadResources(params.getResources());
CmdLineUtil.checkOutputFile("name finder model", modelOutFile);
- ObjectStream<NameSample> sampleStream = openSampleData("Training", trainingDataInFile,
- params.getEncoding());
TokenNameFinderModel model;
try {
- if (mlParams == null) {
- model = opennlp.tools.namefind.NameFinderME.train(params.getLang(), params.getType(),
- sampleStream, featureGeneratorBytes, resources, params.getIterations(),
- params.getCutoff());
- }
- else {
- model = opennlp.tools.namefind.NameFinderME.train(
- params.getLang(), params.getType(), sampleStream,
- mlParams, featureGeneratorBytes, resources);
- }
- }
+ model = opennlp.tools.namefind.NameFinderME.train(
+ factory.getLang(), params.getType(), sampleStream,
+ mlParams, featureGeneratorBytes, resources);
+ }
catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
}
finally {
try {
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.params;
+
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+
+import java.io.File;
+
+/**
+ * Common format parameters.
+ */
+public interface BasicFormatParams extends EncodingParameter {
+
+ @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.")
+ File getData();
+}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicTrainingParams.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicTrainingParams.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicTrainingParams.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicTrainingParams.java Thu Dec 1 20:08:25 2011
@@ -20,27 +20,22 @@ package opennlp.tools.cmdline.params;
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
-// TODO: remove the old BasicTrainingParameters and rename this class to BasicTrainingParameters
-
/**
* Common training parameters.
*
* Note: Do not use this class, internal use only!
*/
-public interface BasicTrainingParams extends EncodingParameter{
+public interface BasicTrainingParams {
- @ParameterDescription(valueName = "language", description = "specifies the language which is being processed.")
- String getLang();
-
- @ParameterDescription(valueName = "num", description = "specifies the number of training iterations. It is ignored if a parameters file is passed.")
+ @ParameterDescription(valueName = "num", description = "number of training iterations, ignored if -params is used.")
@OptionalParameter(defaultValue="100")
Integer getIterations();
- @ParameterDescription(valueName = "num", description = "specifies the min number of times a feature must be seen. It is ignored if a parameters file is passed.")
+ @ParameterDescription(valueName = "num", description = "minimal number of times a feature must be seen, ignored if -params is used.")
@OptionalParameter(defaultValue="5")
Integer getCutoff();
- @ParameterDescription(valueName = "paramsFile", description = "Training parameters file.")
+ @ParameterDescription(valueName = "paramsFile", description = "training parameters file.")
@OptionalParameter()
String getParams();
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java Thu Dec 1 20:08:25 2011
@@ -17,8 +17,6 @@
package opennlp.tools.cmdline.params;
-import java.io.File;
-
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
@@ -29,14 +27,12 @@ import opennlp.tools.cmdline.ArgumentPar
*/
public interface CVParams {
- @ParameterDescription(valueName = "testData", description = "the data to be used during evaluation")
- File getData();
-
- @ParameterDescription(valueName = "true|false", description = "if true will print false negatives and false positives")
+ @ParameterDescription(valueName = "true|false",
+ description = "if true will print false negatives and false positives.")
@OptionalParameter(defaultValue="false")
Boolean getMisclassified();
- @ParameterDescription(valueName = "num", description = "The number of folds. Default is 10")
+ @ParameterDescription(valueName = "num", description = "number of folds, default is 10.")
@OptionalParameter(defaultValue="10")
Integer getFolds();
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java Thu Dec 1 20:08:25 2011
@@ -28,7 +28,8 @@ import opennlp.tools.cmdline.ArgumentPar
*/
public interface DetailedFMeasureEvaluatorParams {
- @ParameterDescription(valueName = "true|false", description = "if true will print detailed FMeasure results")
+ @ParameterDescription(valueName = "true|false",
+ description = "if true will print detailed FMeasure results.")
@OptionalParameter(defaultValue="false")
Boolean getDetailedF();
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetokenizerParameter.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetokenizerParameter.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetokenizerParameter.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetokenizerParameter.java Thu Dec 1 20:08:25 2011
@@ -20,6 +20,7 @@ package opennlp.tools.cmdline.params;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
public interface DetokenizerParameter {
- @ParameterDescription(valueName = "dictionary")
+ @ParameterDescription(valueName = "dictionary",
+ description = "specifies the file with detokenizer dictionary.")
String getDetokenizer();
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EncodingParameter.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EncodingParameter.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EncodingParameter.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EncodingParameter.java Thu Dec 1 20:08:25 2011
@@ -29,9 +29,8 @@ import opennlp.tools.cmdline.ArgumentPar
*/
public interface EncodingParameter {
- @ParameterDescription(valueName = "charsetName", description = "specifies the "
- + "encoding which should be used for reading and writing text. If not specified "
- + "the system default will be used.")
+ @ParameterDescription(valueName = "charsetName",
+ description = "encoding for reading and writing text, if absent the system default is used.")
@OptionalParameter(defaultValue = OptionalParameter.DEFAULT_CHARSET)
Charset getEncoding();
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java Thu Dec 1 20:08:25 2011
@@ -27,15 +27,13 @@ import opennlp.tools.cmdline.ArgumentPar
*
* Note: Do not use this class, internal use only!
*/
-public interface EvaluatorParams extends EncodingParameter{
+public interface EvaluatorParams {
- @ParameterDescription(valueName = "model", description = "the model file to be evaluated")
+ @ParameterDescription(valueName = "model", description = "the model file to be evaluated.")
File getModel();
- @ParameterDescription(valueName = "testData", description = "the data to be used during evaluation")
- File getData();
-
- @ParameterDescription(valueName = "true|false", description = "if true will print false negatives and false positives")
+ @ParameterDescription(valueName = "true|false",
+ description = "if true will print false negatives and false positives.")
@OptionalParameter(defaultValue="false")
Boolean getMisclassified();
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageFormatParams.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageFormatParams.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageFormatParams.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageFormatParams.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.params;
+
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+
+/**
+ * Parameters with a language parameter.
+ */
+public interface LanguageFormatParams extends BasicFormatParams {
+
+ @ParameterDescription(valueName = "language", description = "language which is being processed.")
+ String getLang();
+}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageFormatParams.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/TrainingToolParams.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/TrainingToolParams.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/TrainingToolParams.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/TrainingToolParams.java Thu Dec 1 20:08:25 2011
@@ -21,19 +21,13 @@ import java.io.File;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
-// TODO: remove the old BasicTrainingParameters and rename this class to BasicTrainingParameters
-
/**
* Common training parameters.
*
* Note: Do not use this class, internal use only!
*/
-public interface TrainingToolParams extends BasicTrainingParams{
-
- @ParameterDescription(valueName = "trainData", description = "the data to be used during training")
- File getData();
+public interface TrainingToolParams extends BasicTrainingParams {
- @ParameterDescription(valueName = "modelFile", description = "the output model file")
+ @ParameterDescription(valueName = "modelFile", description = "output model file.")
File getModel();
-
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java Thu Dec 1 20:08:25 2011
@@ -30,10 +30,6 @@ import opennlp.tools.util.ObjectStream;
public final class BuildModelUpdaterTool extends ModelUpdaterTool {
- public String getName() {
- return "BuildModelUpdater";
- }
-
public String getShortDescription() {
return "trains and updates the build model in a parser model";
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java Thu Dec 1 20:08:25 2011
@@ -31,10 +31,6 @@ import opennlp.tools.util.ObjectStream;
// trains a new check model ...
public final class CheckModelUpdaterTool extends ModelUpdaterTool {
- public String getName() {
- return "CheckModelUpdater";
- }
-
public String getShortDescription() {
return "trains and updates the check model in a parser model";
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ModelUpdaterTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ModelUpdaterTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ModelUpdaterTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ModelUpdaterTool.java Thu Dec 1 20:08:25 2011
@@ -20,10 +20,10 @@ package opennlp.tools.cmdline.parser;
import java.io.File;
import java.io.IOException;
+import opennlp.tools.cmdline.AbstractTypedTool;
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.ObjectStreamFactory;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.parser.Parse;
@@ -33,52 +33,43 @@ import opennlp.tools.util.ObjectStream;
/**
* Abstract base class for tools which update the parser model.
*/
-abstract class ModelUpdaterTool implements CmdLineTool {
+abstract class ModelUpdaterTool
+ extends AbstractTypedTool<Parse, ModelUpdaterTool.ModelUpdaterParams> {
interface ModelUpdaterParams extends TrainingToolParams {
+ }
+ protected ModelUpdaterTool() {
+ super(Parse.class, ModelUpdaterParams.class);
}
protected abstract ParserModel trainAndUpdate(ParserModel originalModel,
ObjectStream<Parse> parseSamples, ModelUpdaterParams parameters)
throws IOException;
- public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(ModelUpdaterParams.class);
- }
-
- public final void run(String[] args) {
-
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, ModelUpdaterParams.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- ModelUpdaterParams params = ArgumentParser.parse(args,
- ModelUpdaterParams.class);
+ public final void run(String format, String[] args) {
+ ModelUpdaterParams params = validateAndParseParams(
+ ArgumentParser.filter(args, ModelUpdaterParams.class), ModelUpdaterParams.class);
// Load model to be updated
File modelFile = params.getModel();
ParserModel originalParserModel = new ParserModelLoader().load(modelFile);
- ObjectStream<Parse> parseSamples = ParserTrainerTool.openTrainingData(params.getData(),
- params.getEncoding());
+ ObjectStreamFactory<Parse> factory = getStreamFactory(format);
+ String[] fargs = ArgumentParser.filter(args, factory.getParameters());
+ validateFactoryArgs(factory, fargs);
+ ObjectStream<Parse> sampleStream = factory.create(fargs);
ParserModel updatedParserModel;
try {
- updatedParserModel = trainAndUpdate(originalParserModel,
- parseSamples, params);
+ updatedParserModel = trainAndUpdate(originalParserModel, sampleStream, params);
}
catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
}
finally {
try {
- parseSamples.close();
+ sampleStream.close();
} catch (IOException e) {
// sorry that this can fail
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java Thu Dec 1 20:08:25 2011
@@ -26,11 +26,10 @@ import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
+import opennlp.tools.cmdline.BaseCLITool;
import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.PerformanceMonitor;
-import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.parser.AbstractBottomUpParser;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParserFactory;
@@ -39,12 +38,8 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
-public final class ParserTool implements CmdLineTool {
+public final class ParserTool extends BaseCLITool {
- public String getName() {
- return "Parser";
- }
-
public String getShortDescription() {
return "performs full syntactic parsing";
}
@@ -93,64 +88,64 @@ public final class ParserTool implements
if (args.length < 1) {
System.out.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- ParserModel model = new ParserModelLoader().load(new File(args[args.length - 1]));
-
- Integer beamSize = CmdLineUtil.getIntParameter("-bs", args);
- if (beamSize == null)
- beamSize = AbstractBottomUpParser.defaultBeamSize;
-
- Integer numParses = CmdLineUtil.getIntParameter("-k", args);
- boolean showTopK;
- if (numParses == null) {
- numParses = 1;
- showTopK = false;
- }
- else {
- showTopK = true;
- }
+ } else {
- Double advancePercentage = CmdLineUtil.getDoubleParameter("-ap", args);
-
- if (advancePercentage == null)
- advancePercentage = AbstractBottomUpParser.defaultAdvancePercentage;
-
- opennlp.tools.parser.Parser parser =
- ParserFactory.create(model, beamSize, advancePercentage);
+ ParserModel model = new ParserModelLoader().load(new File(args[args.length - 1]));
- ObjectStream<String> lineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
-
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
- perfMon.start();
-
- try {
- String line;
- while ((line = lineStream.read()) != null) {
- if (line.length() == 0) {
- System.out.println();
- }
- else {
- Parse[] parses = parseLine(line, parser, numParses);
-
- for (int pi=0,pn=parses.length;pi<pn;pi++) {
- if (showTopK) {
- System.out.print(pi+" "+parses[pi].getProb()+" ");
+ Integer beamSize = CmdLineUtil.getIntParameter("-bs", args);
+ if (beamSize == null)
+ beamSize = AbstractBottomUpParser.defaultBeamSize;
+
+ Integer numParses = CmdLineUtil.getIntParameter("-k", args);
+ boolean showTopK;
+ if (numParses == null) {
+ numParses = 1;
+ showTopK = false;
+ }
+ else {
+ showTopK = true;
+ }
+
+ Double advancePercentage = CmdLineUtil.getDoubleParameter("-ap", args);
+
+ if (advancePercentage == null)
+ advancePercentage = AbstractBottomUpParser.defaultAdvancePercentage;
+
+ opennlp.tools.parser.Parser parser =
+ ParserFactory.create(model, beamSize, advancePercentage);
+
+ ObjectStream<String> lineStream =
+ new PlainTextByLineStream(new InputStreamReader(System.in));
+
+ PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
+ perfMon.start();
+
+ try {
+ String line;
+ while ((line = lineStream.read()) != null) {
+ if (line.length() == 0) {
+ System.out.println();
+ }
+ else {
+ Parse[] parses = parseLine(line, parser, numParses);
+
+ for (int pi=0,pn=parses.length;pi<pn;pi++) {
+ if (showTopK) {
+ System.out.print(pi+" "+parses[pi].getProb()+" ");
+ }
+
+ parses[pi].show();
+
+ perfMon.incrementCounter();
}
-
- parses[pi].show();
-
- perfMon.incrementCounter();
}
}
}
- }
- catch (IOException e) {
- CmdLineUtil.handleStdinIoError(e);
+ catch (IOException e) {
+ CmdLineUtil.handleStdinIoError(e);
+ }
+
+ perfMon.stopAndPrintFinalResult();
}
-
- perfMon.stopAndPrintFinalResult();
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java Thu Dec 1 20:08:25 2011
@@ -15,74 +15,42 @@
* limitations under the License.
*/
-
package opennlp.tools.cmdline.parser;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
-import java.nio.charset.Charset;
import opennlp.model.TrainUtil;
-import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.EncodingParameter;
import opennlp.tools.cmdline.params.TrainingToolParams;
+import opennlp.tools.cmdline.parser.ParserTrainerTool.TrainerToolParams;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.parser.HeadRules;
import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.ParseSampleStream;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.parser.ParserType;
import opennlp.tools.parser.chunking.Parser;
import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.model.ModelUtil;
-public final class ParserTrainerTool implements CmdLineTool {
+public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerToolParams> {
- interface TrainerToolParams extends TrainingParams, TrainingToolParams{
-
+ interface TrainerToolParams extends TrainingParams, TrainingToolParams, EncodingParameter {
}
- public String getName() {
- return "ParserTrainer";
+ public ParserTrainerTool() {
+ super(Parse.class, TrainerToolParams.class);
}
-
+
public String getShortDescription() {
return "trains the learnable parser";
}
- public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " "
- + ArgumentParser.createUsage(TrainerToolParams.class);
- }
-
- static ObjectStream<Parse> openTrainingData(File trainingDataFile, Charset encoding) {
-
- CmdLineUtil.checkInputFile("Training data", trainingDataFile);
-
- System.err.print("Opening training data ... ");
-
- FileInputStream trainingDataIn;
- try {
- trainingDataIn = new FileInputStream(trainingDataFile);
- } catch (FileNotFoundException e) {
- System.err.println("failed");
- System.err.println("File not found: " + e.getMessage());
- throw new TerminateToolException(-1);
- }
-
- System.err.println("done");
-
- return new ParseSampleStream(
- new PlainTextByLineStream(trainingDataIn.getChannel(),
- encoding));
- }
-
static Dictionary buildDictionary(ObjectStream<Parse> parseSamples, HeadRules headRules, int cutoff) {
System.err.print("Building dictionary ...");
@@ -104,8 +72,7 @@ public final class ParserTrainerTool imp
if(typeAsString != null && typeAsString.length() > 0) {
type = ParserType.parse(typeAsString);
if(type == null) {
- System.err.println("ParserType training parameter is invalid!");
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(1, "ParserType training parameter is invalid!");
}
}
@@ -113,95 +80,65 @@ public final class ParserTrainerTool imp
}
// TODO: Add param to train tree insert parser
- public void run(String[] args) {
-
- String errorMessage = ArgumentParser.validateArgumentsLoudly(args, TrainerToolParams.class);
- if (null != errorMessage) {
- System.err.println(errorMessage);
- System.err.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- TrainerToolParams params = ArgumentParser.parse(args,
- TrainerToolParams.class);
-
- opennlp.tools.util.TrainingParameters mlParams =
- CmdLineUtil.loadTrainingParameters(params.getParams(), true);
+ public void run(String format, String[] args) {
+ super.run(format, args);
+
+ mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
if (mlParams != null) {
if (!TrainUtil.isValid(mlParams.getSettings("build"))) {
- System.err.println("Build training parameters are invalid!");
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(1, "Build training parameters are invalid!");
}
if (!TrainUtil.isValid(mlParams.getSettings("check"))) {
- System.err.println("Check training parameters are invalid!");
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(1, "Check training parameters are invalid!");
}
if (!TrainUtil.isValid(mlParams.getSettings("attach"))) {
- System.err.println("Attach training parameters are invalid!");
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(1, "Attach training parameters are invalid!");
}
if (!TrainUtil.isValid(mlParams.getSettings("tagger"))) {
- System.err.println("Tagger training parameters are invalid!");
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(1, "Tagger training parameters are invalid!");
}
if (!TrainUtil.isValid(mlParams.getSettings("chunker"))) {
- System.err.println("Chunker training parameters are invalid!");
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(1, "Chunker training parameters are invalid!");
}
}
-
- ObjectStream<Parse> sampleStream = openTrainingData(params.getData(), params.getEncoding());
-
+
+ if(mlParams == null) {
+ mlParams = ModelUtil.createTrainingParameters(params.getIterations(), params.getCutoff());
+ }
+
File modelOutFile = params.getModel();
CmdLineUtil.checkOutputFile("parser model", modelOutFile);
ParserModel model;
try {
-
+
+ // TODO hard-coded language reference
HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(
new InputStreamReader(new FileInputStream(params.getHeadRules()),
params.getEncoding()));
ParserType type = parseParserType(params.getParserType());
- if (mlParams == null) {
- if (ParserType.CHUNKING.equals(type)) {
- model = opennlp.tools.parser.chunking.Parser.train(
- params.getLang(), sampleStream, rules,
- params.getIterations(), params.getCutoff());
- }
- else if (ParserType.TREEINSERT.equals(type)) {
- model = opennlp.tools.parser.treeinsert.Parser.train(params.getLang(), sampleStream, rules, params.getIterations(),
- params.getCutoff());
- }
- else {
- throw new IllegalStateException();
- }
+ if (ParserType.CHUNKING.equals(type)) {
+ model = opennlp.tools.parser.chunking.Parser.train(
+ factory.getLang(), sampleStream, rules,
+ mlParams);
+ }
+ else if (ParserType.TREEINSERT.equals(type)) {
+ model = opennlp.tools.parser.treeinsert.Parser.train(factory.getLang(), sampleStream, rules,
+ mlParams);
}
else {
- if (ParserType.CHUNKING.equals(type)) {
- model = opennlp.tools.parser.chunking.Parser.train(
- params.getLang(), sampleStream, rules,
- mlParams);
- }
- else if (ParserType.TREEINSERT.equals(type)) {
- model = opennlp.tools.parser.treeinsert.Parser.train(params.getLang(), sampleStream, rules,
- mlParams);
- }
- else {
- throw new IllegalStateException();
- }
-
+ throw new IllegalStateException();
}
}
catch (IOException e) {
- CmdLineUtil.printTrainingIoError(e);
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage());
}
finally {
try {
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TaggerModelReplacerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TaggerModelReplacerTool.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TaggerModelReplacerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TaggerModelReplacerTool.java Thu Dec 1 20:08:25 2011
@@ -19,21 +19,16 @@ package opennlp.tools.cmdline.parser;
import java.io.File;
+import opennlp.tools.cmdline.BaseCLITool;
import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.postag.POSModelLoader;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.postag.POSModel;
// user should train with the POS tool
-public final class TaggerModelReplacerTool implements CmdLineTool {
+public final class TaggerModelReplacerTool extends BaseCLITool {
- public String getName() {
- return "TaggerModelReplacer";
- }
-
public String getShortDescription() {
return "replaces the tagger model in a parser model";
}
@@ -46,17 +41,17 @@ public final class TaggerModelReplacerTo
if (args.length != 2) {
System.out.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- File parserModelInFile = new File(args[0]);
- ParserModel parserModel = new ParserModelLoader().load(parserModelInFile);
-
- File taggerModelInFile = new File(args[1]);
- POSModel taggerModel = new POSModelLoader().load(taggerModelInFile);
+ } else {
- ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel);
-
- CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel);
+ File parserModelInFile = new File(args[0]);
+ ParserModel parserModel = new ParserModelLoader().load(parserModelInFile);
+
+ File taggerModelInFile = new File(args[1]);
+ POSModel taggerModel = new POSModelLoader().load(taggerModelInFile);
+
+ ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel);
+
+ CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel);
+ }
}
}