You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/05/19 12:11:45 UTC
svn commit: r1124657 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools:
chunker/ namefind/ parser/chunking/ parser/treeinsert/ postag/ sentdetect/
tokenize/
Author: joern
Date: Thu May 19 10:11:45 2011
New Revision: 1124657
URL: http://svn.apache.org/viewvc?rev=1124657&view=rev
Log:
OPENNLP-180 Removed old main methods
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java Thu May 19 10:11:45 2011
@@ -259,76 +259,4 @@ public class ChunkerME implements Chunke
throws IOException, ObjectStreamException {
return train(lang, in, cutoff, iterations, new DefaultChunkerContextGenerator());
}
-
- @Deprecated
- private static void usage() {
- System.err.println("Usage: ChunkerME [-encoding charset] trainingFile modelFile");
- System.err.println();
- System.err.println("Training file should be one word per line where each line consists of a ");
- System.err.println("space-delimited triple of \"word pos outcome\". Sentence breaks are indicated by blank lines.");
- System.exit(1);
- }
-
- /**
- * Trains the chunker using the specified parameters. <br>
- * Usage: ChunkerME trainingFile modelFile. <br>
- * Training file should be one word per line where each line consists of a
- * space-delimited triple of "word pos outcome". Sentence breaks are indicated by blank lines.
- * @param args The training file and the model file.
- * @throws IOException When the specified files can not be read.
- */
- @Deprecated
- public static void main(String[] args) throws IOException, ObjectStreamException {
- if (args.length == 0) {
- usage();
- }
- int ai = 0;
- String encoding = null;
- while (args[ai].startsWith("-")) {
- if (args[ai].equals("-encoding") && ai+1 < args.length) {
- ai++;
- encoding = args[ai];
- }
- else {
- System.err.println("Unknown option: "+args[ai]);
- usage();
- }
- ai++;
- }
- java.io.File inFile = null;
- java.io.File outFile = null;
- if (ai < args.length) {
- inFile = new java.io.File(args[ai++]);
- }
- else {
- usage();
- }
- if (ai < args.length) {
- outFile = new java.io.File(args[ai++]);
- }
- else {
- usage();
- }
- int iterations = 100;
- int cutoff = 5;
- if (args.length > ai) {
- iterations = Integer.parseInt(args[ai++]);
- }
- if (args.length > ai) {
- cutoff = Integer.parseInt(args[ai++]);
- }
- ChunkerModel mod;
- ObjectStream<ChunkSample> es;
- if (encoding != null) {
- es = new ChunkSampleStream(new PlainTextByLineStream(new InputStreamReader(new FileInputStream(inFile),encoding)));
- }
- else {
- es = new ChunkSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile)));
- }
- mod = train("en", es, cutoff, iterations);
- System.out.println("Saving the model as: " + args[1]);
- OutputStream out = new FileOutputStream(outFile);
- mod.serialize(out);
- out.close();
- }
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Thu May 19 10:11:45 2011
@@ -453,33 +453,4 @@ public class NameFinderME implements Tok
return sortedSpans.toArray(new Span[sortedSpans.size()]);
}
-
- /**
- * Trains a new named entity model on the specified training file using the specified encoding to read it in.
- *
- * @param args [-encoding encoding] training_file model_file
- *
- * @throws java.io.IOException
- */
- @Deprecated
- public static void main(String[] args) throws IOException {
-
- // Encoding must be specified !!!
- // -encoding code train.file model.file
-
- if (args.length == 4) {
-
- NameSampleDataStream sampleStream = new NameSampleDataStream(
- new PlainTextByLineStream(new InputStreamReader(new FileInputStream(args[2]), args[1])));
-
- TokenNameFinderModel model =
- NameFinderME.train("x-unspecified", "default", sampleStream, new HashMap<String, Object>());
-
- model.serialize(new FileOutputStream(args[4]));
-
- }
- else {
- // TODO: Usage
- }
- }
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java Thu May 19 10:11:45 2011
@@ -374,132 +374,4 @@ public class Parser extends AbstractBott
posModel, chunkModel, (opennlp.tools.parser.lang.en.HeadRules) rules,
ParserType.CHUNKING, manifestInfoEntries);
}
-
- @Deprecated
- private static void usage() {
- System.err.println("Usage: Parser -[dict|tag|chunk|build|check|fun] trainingFile parserModelDirectory [iterations cutoff]");
- System.err.println();
- System.err.println("Training file should be one sentence per line where each line consists of a Penn Treebank Style parse");
- System.err.println("-dict Just build the dictionaries.");
- System.err.println("-tag Just build the tagging model.");
- System.err.println("-chunk Just build the chunking model.");
- System.err.println("-build Just build the build model");
- System.err.println("-check Just build the check model");
- System.err.println("-fun Predict function tags");
- }
-
-
-
- @Deprecated
- public static void main(String[] args) throws IOException, InvalidFormatException {
- if (args.length < 2) {
- usage();
- System.exit(1);
- }
- boolean dict = false;
- boolean tag = false;
- boolean chunk = false;
- boolean build = false;
- boolean check = false;
- boolean fun = false;
- boolean all = true;
- int argIndex = 0;
- while (args[argIndex].startsWith("-")) {
- all = false;
- if (args[argIndex].equals("-dict")) {
- dict = true;
- }
- else if (args[argIndex].equals("-tag")) {
- tag = true;
- }
- else if (args[argIndex].equals("-chunk")) {
- chunk = true;
- }
- else if (args[argIndex].equals("-build")) {
- build = true;
- }
- else if (args[argIndex].equals("-check")) {
- check = true;
- }
- else if (args[argIndex].equals("-fun")) {
- fun = true;
- }
- else if (args[argIndex].equals("--")) {
- argIndex++;
- break;
- }
- else {
- System.err.println("Invalid option " + args[argIndex]);
- usage();
- System.exit(1);
- }
- argIndex++;
- }
- java.io.File inFile = new java.io.File(args[argIndex++]);
- String modelDirectory = args[argIndex++];
- HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(modelDirectory+"/head_rules");
- java.io.File dictFile = new java.io.File(modelDirectory+"/dict.bin.gz");
- java.io.File tagFile = new java.io.File(modelDirectory+"/tag.bin.gz");
- java.io.File chunkFile = new java.io.File(modelDirectory+"/chunk.bin.gz");
- java.io.File buildFile = new java.io.File(modelDirectory+"/build.bin.gz");
- java.io.File checkFile = new java.io.File(modelDirectory+"/check.bin.gz");
- int iterations = 100;
- int cutoff = 5;
- if (args.length > argIndex) {
- iterations = Integer.parseInt(args[argIndex++]);
- cutoff = Integer.parseInt(args[argIndex++]);
- }
- // TODO: This option is missing in the current CLI tools,
- // and it is not thread safe ...
- if (fun) {
- Parse.useFunctionTags(true);
- }
-
- if (dict || all) {
- System.err.println("Building dictionary");
- ObjectStream<Parse> data = new ParseSampleStream(new PlainTextByLineStream(new FileReader(inFile)));
- Dictionary mdict = buildDictionary(data, rules, cutoff);
- System.out.println("Saving the dictionary");
- mdict.serialize(new FileOutputStream(dictFile));
- }
-
- if (tag || all) {
- System.err.println("Training tagger");
- ObjectStream<POSSample> tes = new PosSampleStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))));
- POSModel posModel = POSTaggerME.train("en", tes, ModelType.MAXENT, null, null, cutoff, 100);
- System.out.println("Saving the tagger model as: " + tagFile);
- OutputStream posOutputStream = new FileOutputStream(tagFile);
- posModel.serialize(posOutputStream);
- posOutputStream.close();
- }
-
- if (chunk || all) {
- System.err.println("Training chunker");
- ObjectStream<ChunkSample> ces = new ChunkSampleStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))));
- ChunkerModel chunkModel = ChunkerME.train("en", ces, cutoff, iterations,
- new ChunkContextGenerator());
- System.out.println("Saving the chunker model as: " + chunkFile);
- OutputStream chunkOutputStream = new FileOutputStream(chunkFile);
- chunkModel.serialize(chunkOutputStream);
- chunkOutputStream.close();
- }
-
- if (build || all) {
- System.err.println("Loading Dictionary");
- Dictionary tridict = new Dictionary(new FileInputStream(dictFile.toString()),true);
- System.err.println("Training builder");
- opennlp.model.EventStream bes = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))), rules, ParserEventTypeEnum.BUILD,tridict);
- AbstractModel buildModel = train(bes, iterations, cutoff);
- System.out.println("Saving the build model as: " + buildFile);
- new opennlp.maxent.io.SuffixSensitiveGISModelWriter(buildModel, buildFile).persist();
- }
-
- if (check || all) {
- System.err.println("Training checker");
- opennlp.model.EventStream kes = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))), rules, ParserEventTypeEnum.CHECK);
- AbstractModel checkModel = train(kes, iterations, cutoff);
- System.out.println("Saving the check model as: " + checkFile);
- new opennlp.maxent.io.SuffixSensitiveGISModelWriter(checkModel, checkFile).persist();
- }
- }
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java Thu May 19 10:11:45 2011
@@ -540,119 +540,4 @@ public class Parser extends AbstractBott
public static AbstractModel train(opennlp.model.EventStream es, int iterations, int cut) throws java.io.IOException {
return opennlp.maxent.GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));
}
-
- @Deprecated
- private static void usage() {
- System.err.println("Usage: ParserME -[dict|tag|chunk|build|attach|fun] trainingFile parserModelDirectory [iterations cutoff]");
- System.err.println();
- System.err.println("Training file should be one sentence per line where each line consists of a Penn Treebank Style parse");
- System.err.println("-tag Just build the tagging model.");
- System.err.println("-chunk Just build the chunking model.");
- System.err.println("-build Just build the build model");
- System.err.println("-attach Just build the attach model");
- System.err.println("-fun Predict function tags");
- }
-
- @Deprecated
- public static void main(String[] args) throws java.io.IOException {
- if (args.length < 3) {
- usage();
- System.exit(1);
- }
- boolean tag = false;
- boolean chunk = false;
- boolean build = false;
- boolean attach = false;
- boolean check = false;
- boolean fun = false;
- boolean all = true;
- int argIndex = 0;
- while (args[argIndex].startsWith("-")) {
- all = false;
- if (args[argIndex].equals("-tag")) {
- tag = true;
- }
- else if (args[argIndex].equals("-chunk")) {
- chunk = true;
- }
- else if (args[argIndex].equals("-build")) {
- build = true;
- }
- else if (args[argIndex].equals("-attach")) {
- attach = true;
- }
- else if (args[argIndex].equals("-check")) {
- check = true;
- }
- else if (args[argIndex].equals("-fun")) {
- fun = true;
- }
- else if (args[argIndex].equals("--")) {
- argIndex++;
- break;
- }
- else {
- System.err.println("Invalid option " + args[argIndex]);
- usage();
- System.exit(1);
- }
- argIndex++;
- }
- java.io.File inFile = new java.io.File(args[argIndex++]);
- String modelDirectory = args[argIndex++];
- HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(modelDirectory+"/head_rules");
- java.io.File tagFile = new java.io.File(modelDirectory+"/tag.bin.gz");
- java.io.File chunkFile = new java.io.File(modelDirectory+"/chunk.bin.gz");
- java.io.File buildFile = new java.io.File(modelDirectory+"/build.bin.gz");
- java.io.File attachFile = new java.io.File(modelDirectory+"/attach.bin.gz");
- java.io.File checkFile = new java.io.File(modelDirectory+"/check.bin.gz");
- int iterations = 100;
- int cutoff = 5;
- if (args.length > argIndex) {
- iterations = Integer.parseInt(args[argIndex++]);
- cutoff = Integer.parseInt(args[argIndex++]);
- }
- if (fun) {
- Parse.useFunctionTags(true);
- }
- if (tag || all) {
- System.err.println("Training tagger");
- opennlp.model.EventStream tes = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))), rules, ParserEventTypeEnum.TAG);
- AbstractModel tagModel = train(tes, iterations, cutoff);
- System.out.println("Saving the tagger model as: " + tagFile);
- new opennlp.maxent.io.SuffixSensitiveGISModelWriter(tagModel, tagFile).persist();
- }
-
- if (chunk || all) {
- System.err.println("Training chunker");
- opennlp.model.EventStream ces = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))), rules, ParserEventTypeEnum.CHUNK);
- AbstractModel chunkModel = train(ces, iterations, cutoff);
- System.out.println("Saving the chunker model as: " + chunkFile);
- new opennlp.maxent.io.SuffixSensitiveGISModelWriter(chunkModel, chunkFile).persist();
- }
-
- if (build || all) {
- System.err.println("Training builder");
- opennlp.model.EventStream bes = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))), rules, ParserEventTypeEnum.BUILD,null);
- AbstractModel buildModel = train(bes, iterations, cutoff);
- System.out.println("Saving the build model as: " + buildFile);
- new opennlp.maxent.io.SuffixSensitiveGISModelWriter(buildModel, buildFile).persist();
- }
-
- if (attach || all) {
- System.err.println("Training attacher");
- opennlp.model.EventStream kes = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))), rules, ParserEventTypeEnum.ATTACH);
- AbstractModel attachModel = train(kes, iterations, cutoff);
- System.out.println("Saving the attach model as: " + attachFile);
- new opennlp.maxent.io.SuffixSensitiveGISModelWriter(attachModel, attachFile).persist();
- }
-
- if (check || all) {
- System.err.println("Training checker");
- opennlp.model.EventStream ces = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))), rules, ParserEventTypeEnum.CHECK);
- AbstractModel checkModel = train(ces, iterations, cutoff);
- System.out.println("Saving the check model as: " + checkFile);
- new opennlp.maxent.io.SuffixSensitiveGISModelWriter(checkModel, checkFile).persist();
- }
- }
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java Thu May 19 10:11:45 2011
@@ -289,13 +289,4 @@ public class POSDictionary implements It
return newPosDict;
}
-
- public static void main(String[] args) throws IOException, InvalidFormatException {
- POSModel model = new POSModel(new FileInputStream(args[0]));
- POSDictionary dict = model.getTagDictionary();
- BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
- for (String line = in.readLine();line != null;line = in.readLine()) {
- System.out.println(Arrays.asList(dict.getTags(line)));
- }
- }
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java Thu May 19 10:11:45 2011
@@ -148,32 +148,4 @@ public final class POSModel extends Base
public Dictionary getNgramDictionary() {
return (Dictionary) artifactMap.get(NGRAM_DICTIONARY_ENTRY_NAME);
}
-
- public static void usage() {
- System.err.println("POSModel packageName modelName [tagDictionary] [ngramDictionary]");
- }
-
- @Deprecated
- public static void main(String[] args) throws IOException, InvalidFormatException {
- if (args.length == 0){
- usage();
- System.exit(1);
- }
- int ai=0;
- String packageName = args[ai++];
- String modelName = args[ai++];
- AbstractModel model = new GenericModelReader(new File(modelName)).getModel();
- POSDictionary tagDict = null;
- Dictionary ngramDict = null;
- if (ai < args.length) {
- String tagDictName = args[ai++];
- tagDict = new POSDictionary(tagDictName);
- if (ai < args.length) {
- String ngramName = args[ai++];
- ngramDict = new Dictionary(new FileInputStream(ngramName));
- }
- }
-
- new POSModel("en", model,tagDict,ngramDict).serialize(new FileOutputStream(new File(packageName)));
- }
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java Thu May 19 10:11:45 2011
@@ -92,16 +92,4 @@ public class SDCrossValidator {
public FMeasure getFMeasure() {
return fmeasure;
}
-
- @Deprecated
- public static void main(String[] args) throws Exception {
-
- SDCrossValidator cv = new SDCrossValidator("en");
-
- cv.evaluate(new SentenceSampleStream(new PlainTextByLineStream(
- new FileInputStream("/home/joern/Infopaq/opennlp.data/en/eos/eos.all").getChannel(),
- "ISO-8859-1")), 10);
-
- System.out.println(cv.getFMeasure().toString());
- }
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java Thu May 19 10:11:45 2011
@@ -316,84 +316,4 @@ public class SentenceDetectorME implemen
return new SentenceModel(languageCode, sentModel,
useTokenEnd, abbreviations, manifestInfoEntries);
}
-
- private static void usage() {
- System.err.println("Usage: SentenceDetectorME -encoding charset -lang language trainData modelName [cutoff iterations]");
- System.err.println("-encoding charset specifies the encoding which should be used ");
- System.err.println(" for reading and writing text.");
- System.err.println("-lang language specifies the language which ");
- System.err.println(" is being processed.");
- System.err.println("trainData specifies the name of the input training file");
- System.err.println(" to train the resulting model.");
- System.err.println("modelName specifies the resulting saved model after");
- System.err.println(" training.");
- System.exit(1);
- }
-
- /**
- * <p>Trains a new sentence detection model.</p>
- *
- * <p>Usage: opennlp.tools.sentdetect.SentenceDetectorME data_file new_model_name (iterations cutoff)?</p>
- *
- * @param args
- * @throws IOException
- */
- public static void main(String[] args) throws IOException {
- int ai=0;
- String encoding = null;
- String lang = null;
- if (args.length == 0) {
- usage();
- }
- while (args[ai].startsWith("-")) {
- if (args[ai].equals("-encoding")) {
- ai++;
- if (ai < args.length) {
- encoding = args[ai];
- ai++;
- }
- else {
- usage();
- }
- }
- else if (args[ai].equals("-lang")) {
- ai++;
- if (ai < args.length) {
- lang = args[ai];
- ai++;
- }
- else {
- usage();
- }
- }
- else {
- usage();
- }
- }
-
- File inFile = new File(args[ai++]);
- File outFile = new File(args[ai++]);
-
- try {
- if ((lang == null) || (encoding == null)) {
- usage();
- }
-
- SentenceModel model = train(lang, new SentenceSampleStream(new PlainTextByLineStream(
- new InputStreamReader(new FileInputStream(inFile), encoding))), true, null);
-
- // TODO: add support for iterations and cutoff settings
-
-// if (args.length > ai)
-// mod = train(es, Integer.parseInt(args[ai++]), Integer.parseInt(args[ai++]));
-// else
-// mod = train(es, 100, 5);
-
- System.out.println("Saving the model as: " + outFile);
- model.serialize(new FileOutputStream(outFile));
- }
- catch (Exception e) {
- e.printStackTrace();
- }
- }
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java?rev=1124657&r1=1124656&r2=1124657&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java Thu May 19 10:11:45 2011
@@ -96,63 +96,4 @@ public class TokenizerCrossValidator {
public FMeasure getFMeasure() {
return fmeasure;
}
-
- private static void usage() {
- System.err.println("Usage: TokenizerCrossValidator -encoding charset -lang language trainData");
- System.err.println("-encoding charset specifies the encoding which should be used ");
- System.err.println(" for reading and writing text.");
- System.err.println("-lang language specifies the language which ");
- System.err.println(" is being processed.");
- System.exit(1);
- }
-
- @Deprecated
- public static void main(String[] args) throws IOException, ObjectStreamException {
- int ai=0;
- String encoding = null;
- String lang = null;
- if (args.length != 5) {
- usage();
- }
-
- while (args[ai].startsWith("-")) {
- if (args[ai].equals("-encoding")) {
- ai++;
- if (ai < args.length) {
- encoding = args[ai];
- ai++;
- }
- else {
- usage();
- }
- }
- else if (args[ai].equals("-lang")) {
- ai++;
- if (ai < args.length) {
- lang = args[ai];
- ai++;
- }
- else {
- usage();
- }
- }
- else {
- usage();
- }
- }
-
- File trainingDataFile = new File(args[ai++]);
-
- FileInputStream trainingDataIn = new FileInputStream(trainingDataFile);
- ObjectStream<String> lineStream = new PlainTextByLineStream(trainingDataIn.getChannel(), encoding);
- ObjectStream<TokenSample> sampleStream = new TokenSampleStream(lineStream);
-
- TokenizerCrossValidator validator = new TokenizerCrossValidator(lang, false);
-
- validator.evaluate(sampleStream, 10);
-
- FMeasure result = validator.getFMeasure();
-
- System.out.println(result.toString());
- }
}