You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/05/02 14:34:33 UTC
svn commit: r1591889 [2/14] - in /opennlp/trunk: opennlp-tools/lang/ml/
opennlp-tools/src/main/java/opennlp/tools/chunker/
opennlp-tools/src/main/java/opennlp/tools/cmdline/
opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/
opennlp-tools/src/m...
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java Fri May 2 12:34:23 2014
@@ -69,26 +69,26 @@ import opennlp.tools.cmdline.tokenizer.T
import opennlp.tools.util.Version;
public final class CLI {
-
+
public static final String CMD = "opennlp";
-
+
private static Map<String, CmdLineTool> toolLookupMap;
-
+
static {
toolLookupMap = new LinkedHashMap<String, CmdLineTool>();
-
+
List<CmdLineTool> tools = new LinkedList<CmdLineTool>();
-
+
// Document Categorizer
tools.add(new DoccatTool());
tools.add(new DoccatTrainerTool());
tools.add(new DoccatEvaluatorTool());
tools.add(new DoccatCrossValidatorTool());
tools.add(new DoccatConverterTool());
-
+
// Dictionary Builder
tools.add(new DictionaryBuilderTool());
-
+
// Tokenizer
tools.add(new SimpleTokenizerTool());
tools.add(new TokenizerMETool());
@@ -97,14 +97,14 @@ public final class CLI {
tools.add(new TokenizerCrossValidatorTool());
tools.add(new TokenizerConverterTool());
tools.add(new DictionaryDetokenizerTool());
-
+
// Sentence detector
tools.add(new SentenceDetectorTool());
tools.add(new SentenceDetectorTrainerTool());
tools.add(new SentenceDetectorEvaluatorTool());
tools.add(new SentenceDetectorCrossValidatorTool());
tools.add(new SentenceDetectorConverterTool());
-
+
// Name Finder
tools.add(new TokenNameFinderTool());
tools.add(new TokenNameFinderTrainerTool());
@@ -112,22 +112,22 @@ public final class CLI {
tools.add(new TokenNameFinderCrossValidatorTool());
tools.add(new TokenNameFinderConverterTool());
tools.add(new CensusDictionaryCreatorTool());
-
-
+
+
// POS Tagger
tools.add(new opennlp.tools.cmdline.postag.POSTaggerTool());
tools.add(new POSTaggerTrainerTool());
tools.add(new POSTaggerEvaluatorTool());
tools.add(new POSTaggerCrossValidatorTool());
tools.add(new POSTaggerConverterTool());
-
+
// Chunker
tools.add(new ChunkerMETool());
tools.add(new ChunkerTrainerTool());
tools.add(new ChunkerEvaluatorTool());
tools.add(new ChunkerCrossValidatorTool());
tools.add(new ChunkerConverterTool());
-
+
// Parser
tools.add(new ParserTool());
tools.add(new ParserTrainerTool()); // trains everything
@@ -136,29 +136,29 @@ public final class CLI {
tools.add(new BuildModelUpdaterTool()); // re-trains build model
tools.add(new CheckModelUpdaterTool()); // re-trains build model
tools.add(new TaggerModelReplacerTool());
-
+
// Entity Linker
tools.add(new EntityLinkerTool());
-
+
for (CmdLineTool tool : tools) {
toolLookupMap.put(tool.getName(), tool);
}
-
+
toolLookupMap = Collections.unmodifiableMap(toolLookupMap);
}
-
+
/**
* @return a set which contains all tool names
*/
public static Set<String> getToolNames() {
return toolLookupMap.keySet();
}
-
+
private static void usage() {
System.out.print("OpenNLP " + Version.currentVersion().toString() + ". ");
System.out.println("Usage: " + CMD + " TOOL");
System.out.println("where TOOL is one of:");
-
+
// distance of tool name from line start
int numberOfSpaces = -1;
for (String toolName : toolLookupMap.keySet()) {
@@ -167,29 +167,29 @@ public final class CLI {
}
}
numberOfSpaces = numberOfSpaces + 4;
-
+
for (CmdLineTool tool : toolLookupMap.values()) {
-
+
System.out.print(" " + tool.getName());
-
+
for (int i = 0; i < Math.abs(tool.getName().length() - numberOfSpaces); i++) {
System.out.print(" ");
}
-
+
System.out.println(tool.getShortDescription());
}
-
+
System.out.println("All tools print help when invoked with help parameter");
System.out.println("Example: opennlp SimpleTokenizer help");
}
-
+
public static void main(String[] args) {
-
+
if (args.length == 0) {
usage();
System.exit(0);
}
-
+
String toolArguments[] = new String[args.length -1];
System.arraycopy(args, 1, toolArguments, 0, toolArguments.length);
@@ -203,7 +203,7 @@ public final class CLI {
toolName = toolName.substring(0, idx);
}
CmdLineTool tool = toolLookupMap.get(toolName);
-
+
try {
if (null == tool) {
throw new TerminateToolException(1, "Tool " + toolName + " is not found.");
@@ -233,7 +233,7 @@ public final class CLI {
}
}
catch (TerminateToolException e) {
-
+
if (e.getMessage() != null) {
System.err.println(e.getMessage());
}
@@ -242,7 +242,7 @@ public final class CLI {
System.err.println(e.getCause().getMessage());
e.getCause().printStackTrace(System.err);
}
-
+
System.exit(e.getCode());
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineTool.java Fri May 2 12:34:23 2014
@@ -63,7 +63,7 @@ public abstract class CmdLineTool {
* @return a description on how to use the tool
*/
public abstract String getHelp();
-
+
protected <T> T validateAndParseParams(String[] args, Class<T> argProxyInterface) {
String errorMessage = ArgumentParser.validateArgumentsLoudly(args, argProxyInterface);
if (null != errorMessage) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java Fri May 2 12:34:23 2014
@@ -310,11 +310,11 @@ public final class CmdLineUtil {
public static TerminateToolException createObjectStreamError(IOException e) {
return new TerminateToolException(-1, "IO Error while creating an Input Stream: " + e.getMessage(), e);
}
-
+
public static void handleCreateObjectStreamError(IOException e) {
throw createObjectStreamError(e);
}
-
+
// its optional, passing null is allowed
public static TrainingParameters loadTrainingParameters(String paramFile,
boolean supportSequenceTraining) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/DetailedFMeasureListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/DetailedFMeasureListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/DetailedFMeasureListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/DetailedFMeasureListener.java Fri May 2 12:34:23 2014
@@ -111,11 +111,11 @@ public abstract class DetailedFMeasureLi
+ "; recall: " + PERCENT + "; F1: " + PERCENT + ".";
private static final String FORMAT_EXTRA = FORMAT
+ " [target: %3d; tp: %3d; fp: %3d]";
-
+
public String createReport() {
return createReport(Locale.getDefault());
}
-
+
public String createReport(Locale locale) {
StringBuilder ret = new StringBuilder();
int tp = generalStats.getTruePositives();
@@ -222,7 +222,7 @@ public abstract class DetailedFMeasureLi
/**
* Retrieves the arithmetic mean of the precision scores calculated for each
* evaluated sample.
- *
+ *
* @return the arithmetic mean of all precision scores
*/
public double getPrecisionScore() {
@@ -234,7 +234,7 @@ public abstract class DetailedFMeasureLi
/**
* Retrieves the arithmetic mean of the recall score calculated for each
* evaluated sample.
- *
+ *
* @return the arithmetic mean of all recall scores
*/
public double getRecallScore() {
@@ -245,9 +245,9 @@ public abstract class DetailedFMeasureLi
/**
* Retrieves the f-measure score.
- *
+ *
* f-measure = 2 * precision * recall / (precision + recall)
- *
+ *
* @return the f-measure or -1 if precision + recall <= 0
*/
public double getFMeasure() {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java Fri May 2 12:34:23 2014
@@ -67,7 +67,7 @@ public abstract class EvaluationErrorPri
if (id != null) {
printStream.println("Id: {" + id + "}");
}
-
+
printSamples(referenceSample, predictedSample);
printErrors(falsePositives, falseNegatives, sentenceTokens);
@@ -79,7 +79,7 @@ public abstract class EvaluationErrorPri
T referenceSample, T predictedSample, String[] sentenceTokens) {
printError(null, references, predictions, referenceSample, predictedSample, sentenceTokens);
}
-
+
// for pos tagger
protected void printError(String references[], String predictions[],
T referenceSample, T predictedSample, String[] sentenceTokens) {
@@ -112,7 +112,7 @@ public abstract class EvaluationErrorPri
/**
* Auxiliary method to print tag errors
- *
+ *
* @param filteredDoc
* the document tokens which were tagged wrong
* @param filteredRefs
@@ -134,7 +134,7 @@ public abstract class EvaluationErrorPri
/**
* Auxiliary method to print span errors
- *
+ *
* @param falsePositives
* false positives span
* @param falseNegatives
@@ -157,7 +157,7 @@ public abstract class EvaluationErrorPri
/**
* Auxiliary method to print span errors
- *
+ *
* @param falsePositives
* false positives span
* @param falseNegatives
@@ -176,7 +176,7 @@ public abstract class EvaluationErrorPri
/**
* Auxiliary method to print spans
- *
+ *
* @param spans
* the span list
* @param toks
@@ -190,7 +190,7 @@ public abstract class EvaluationErrorPri
/**
* Auxiliary method to print expected and predicted samples.
- *
+ *
* @param referenceSample
* the reference sample
* @param predictedSample
@@ -205,7 +205,7 @@ public abstract class EvaluationErrorPri
/**
* Outputs falseNegatives and falsePositives spans from the references and
* predictions list.
- *
+ *
* @param references
* @param predictions
* @param falseNegatives
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java Fri May 2 12:34:23 2014
@@ -29,10 +29,10 @@ import java.io.InputStream;
class MarkableFileInputStream extends InputStream {
private FileInputStream in;
-
+
private long markedPosition = -1;
private IOException markException;
-
+
MarkableFileInputStream(File file) throws FileNotFoundException {
in = new FileInputStream(file);
}
@@ -45,22 +45,22 @@ class MarkableFileInputStream extends In
markedPosition = -1;
}
}
-
+
@Override
public boolean markSupported() {
return true;
}
-
+
private void throwMarkExceptionIfOccured() throws IOException {
if (markException != null) {
throw markException;
}
}
-
+
@Override
public synchronized void reset() throws IOException {
throwMarkExceptionIfOccured();
-
+
if (markedPosition >= 0) {
in.getChannel().position(markedPosition);
}
@@ -68,17 +68,17 @@ class MarkableFileInputStream extends In
throw new IOException("Stream has to be marked before it can be reset!");
}
}
-
+
@Override
public int read() throws IOException {
return in.read();
}
-
+
@Override
public int read(byte[] b) throws IOException {
return in.read(b);
}
-
+
@Override
public int read(byte[] b, int off, int len) throws IOException {
return in.read(b, off, len);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/ModelLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/ModelLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/ModelLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/ModelLoader.java Fri May 2 12:34:23 2014
@@ -28,36 +28,36 @@ import opennlp.tools.util.InvalidFormatE
* Loads a model and does all the error handling for the command line tools.
* <p>
* <b>Note:</b> Do not use this class, internal use only!
- *
+ *
* @param <T>
*/
public abstract class ModelLoader<T> {
-
+
private final String modelName;
-
+
protected ModelLoader(String modelName) {
-
+
if (modelName == null)
throw new IllegalArgumentException("modelName must not be null!");
-
+
this.modelName = modelName;
}
-
+
protected abstract T loadModel(InputStream modelIn) throws
IOException, InvalidFormatException;
-
+
public T load(File modelFile) {
-
+
long beginModelLoadingTime = System.currentTimeMillis();
-
+
CmdLineUtil.checkInputFile(modelName + " model", modelFile);
System.err.print("Loading " + modelName + " model ... ");
-
+
InputStream modelIn = new BufferedInputStream(CmdLineUtil.openInFile(modelFile), CmdLineUtil.IO_BUFFER_SIZE);
-
+
T model;
-
+
try {
model = loadModel(modelIn);
}
@@ -70,7 +70,7 @@ public abstract class ModelLoader<T> {
throw new TerminateToolException(-1, "IO error while loading model file '" + modelFile + "'", e);
}
finally {
- // will not be null because openInFile would
+ // will not be null because openInFile would
// terminate in this case
try {
modelIn.close();
@@ -78,11 +78,11 @@ public abstract class ModelLoader<T> {
// sorry that this can fail
}
}
-
+
long modelLoadingDuration = System.currentTimeMillis() - beginModelLoadingTime;
-
+
System.err.printf("done (%.3fs)\n", modelLoadingDuration / 1000d);
-
+
return model;
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java Fri May 2 12:34:23 2014
@@ -27,10 +27,10 @@ public interface ObjectStreamFactory<T>
* @return interface with parameters description
*/
<P> Class<P> getParameters();
-
+
/**
* Creates the <code>ObjectStream</code>.
- *
+ *
* @param args arguments
* @return ObjectStream instance
*/
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java Fri May 2 12:34:23 2014
@@ -36,19 +36,19 @@ import java.util.concurrent.TimeUnit;
*/
public class PerformanceMonitor {
- private ScheduledExecutorService scheduler =
+ private ScheduledExecutorService scheduler =
Executors.newScheduledThreadPool(1);
private final String unit;
-
+
private ScheduledFuture<?> beeperHandle;
-
+
private volatile long startTime = -1;
-
+
private volatile int counter;
-
+
private final PrintStream out;
-
+
public PerformanceMonitor(PrintStream out, String unit) {
this.out = out;
this.unit = unit;
@@ -57,44 +57,44 @@ public class PerformanceMonitor {
public PerformanceMonitor(String unit) {
this(System.out, unit);
}
-
+
public boolean isStarted() {
return startTime != -1;
}
-
+
public void incrementCounter(int increment) {
-
+
if (!isStarted())
throw new IllegalStateException("Must be started first!");
-
- if (increment < 0)
+
+ if (increment < 0)
throw new IllegalArgumentException("increment must be zero or positive but was " + increment + "!");
-
+
counter += increment;
}
-
+
public void incrementCounter() {
incrementCounter(1);
}
-
+
public void start() {
-
- if (isStarted())
+
+ if (isStarted())
throw new IllegalStateException("Already started!");
-
+
startTime = System.currentTimeMillis();
}
-
-
+
+
public void startAndPrintThroughput() {
-
+
start();
-
+
final Runnable beeper = new Runnable() {
-
+
private long lastTimeStamp = startTime;
private int lastCount = counter;
-
+
public void run() {
int deltaCount = counter - lastCount;
@@ -111,7 +111,7 @@ public class PerformanceMonitor {
}
long totalTimePassed = System.currentTimeMillis() - startTime;
-
+
double averageThroughput;
if (totalTimePassed > 0) {
averageThroughput = counter / (((double) totalTimePassed) / 1000);
@@ -119,33 +119,33 @@ public class PerformanceMonitor {
else {
averageThroughput = 0;
}
-
+
out.printf("current: %.1f " + unit + "/s avg: %.1f " + unit + "/s total: %d " + unit + "%n", currentThroughput,
averageThroughput, counter);
lastTimeStamp = System.currentTimeMillis();
lastCount = counter;
}
- };
-
+ };
+
beeperHandle = scheduler.scheduleAtFixedRate(beeper, 1, 1, TimeUnit.SECONDS);
}
-
+
public void stopAndPrintFinalResult() {
-
+
if (!isStarted())
throw new IllegalStateException("Must be started first!");
-
+
if (beeperHandle != null) {
// yeah we have time to finish current
// printing if there is one
beeperHandle.cancel(false);
}
-
+
scheduler.shutdown();
-
+
long timePassed = System.currentTimeMillis() - startTime;
-
+
double average;
if (timePassed > 0) {
average = counter / (timePassed / 1000d);
@@ -153,10 +153,10 @@ public class PerformanceMonitor {
else {
average = 0;
}
-
+
out.println();
out.println();
-
+
out.printf("Average: %.1f " + unit +"/s %n", average);
out.println("Total: " + counter + " " + unit);
out.println("Runtime: " + timePassed / 1000d + "s");
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java Fri May 2 12:34:23 2014
@@ -70,21 +70,21 @@ public final class StreamFactoryRegistry
SentenceSampleStreamFactory.registerFactory();
TokenSampleStreamFactory.registerFactory();
WordTagSampleStreamFactory.registerFactory();
-
+
NameToSentenceSampleStreamFactory.registerFactory();
NameToTokenSampleStreamFactory.registerFactory();
-
+
POSToSentenceSampleStreamFactory.registerFactory();
POSToTokenSampleStreamFactory.registerFactory();
ParseToPOSSampleStreamFactory.registerFactory();
ParseToSentenceSampleStreamFactory.registerFactory();
ParseToTokenSampleStreamFactory.registerFactory();
-
+
OntoNotesNameSampleStreamFactory.registerFactory();
OntoNotesParseSampleStreamFactory.registerFactory();
OntoNotesPOSSampleStreamFactory.registerFactory();
-
+
BioNLP2004NameSampleStreamFactory.registerFactory();
Conll02NameSampleStreamFactory.registerFactory();
Conll03NameSampleStreamFactory.registerFactory();
@@ -98,11 +98,11 @@ public final class StreamFactoryRegistry
ADSentenceSampleStreamFactory.registerFactory();
ADPOSSampleStreamFactory.registerFactory();
ADTokenSampleStreamFactory.registerFactory();
-
+
Muc6NameSampleStreamFactory.registerFactory();
-
+
ConstitParseSampleStreamFactory.registerFactory();
-
+
BratNameSampleStreamFactory.registerFactory();
}
@@ -180,20 +180,20 @@ public final class StreamFactoryRegistry
if (null == formatName) {
formatName = DEFAULT_FORMAT;
}
-
+
ObjectStreamFactory<T> factory = registry.containsKey(sampleClass) ?
registry.get(sampleClass).get(formatName) : null;
-
+
if (factory != null) {
return factory;
}
else {
try {
Class<?> factoryClazz = Class.forName(formatName);
-
+
// TODO: Need to check if it can produce the desired output
// Otherwise there will be class cast exceptions later in the flow
-
+
try {
return (ObjectStreamFactory<T>) factoryClazz.newInstance();
} catch (InstantiationException e) {
@@ -201,7 +201,7 @@ public final class StreamFactoryRegistry
} catch (IllegalAccessException e) {
return null;
}
-
+
} catch (ClassNotFoundException e) {
return null;
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/SystemInputStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/SystemInputStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/SystemInputStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/SystemInputStreamFactory.java Fri May 2 12:34:23 2014
@@ -26,14 +26,14 @@ import opennlp.tools.util.InputStreamFac
public class SystemInputStreamFactory implements InputStreamFactory {
private boolean isTainted = false;
-
+
public static Charset encoding() {
return Charset.defaultCharset();
}
-
+
@Override
public InputStream createInputStream() throws IOException {
-
+
if (!isTainted) {
isTainted = true;
return System.in;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/TerminateToolException.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/TerminateToolException.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/TerminateToolException.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/TerminateToolException.java Fri May 2 12:34:23 2014
@@ -33,10 +33,10 @@ package opennlp.tools.cmdline;
public class TerminateToolException extends RuntimeException {
private static final long serialVersionUID = 1L;
-
+
private final int code;
private final String message;
-
+
public TerminateToolException(int code, String message, Throwable t) {
super(t);
this.code = code;
@@ -47,15 +47,15 @@ public class TerminateToolException exte
this.code = code;
this.message = message;
}
-
+
public TerminateToolException(int code) {
this(code, null);
}
-
+
public int getCode() {
return code;
}
-
+
@Override
public String getMessage() {
return message;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java Fri May 2 12:34:23 2014
@@ -109,7 +109,7 @@ public abstract class TypedCmdLineTool<T
public String getHelp() {
return getHelp("");
}
-
+
/**
* Executes the tool with the given parameters.
*
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkEvaluationErrorListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkEvaluationErrorListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkEvaluationErrorListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkEvaluationErrorListener.java Fri May 2 12:34:23 2014
@@ -27,7 +27,7 @@ import opennlp.tools.util.eval.Evaluatio
/**
* A default implementation of {@link EvaluationMonitor} that prints
* to an output stream.
- *
+ *
*/
public class ChunkEvaluationErrorListener extends
EvaluationErrorPrinter<ChunkSample> implements ChunkerEvaluationMonitor {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java Fri May 2 12:34:23 2014
@@ -37,7 +37,7 @@ import opennlp.tools.util.model.ModelUti
public final class ChunkerCrossValidatorTool
extends AbstractCrossValidatorTool<ChunkSample, CVToolParams> {
-
+
interface CVToolParams extends TrainingParams, CVParams, DetailedFMeasureEvaluatorParams {
}
@@ -48,7 +48,7 @@ public final class ChunkerCrossValidator
public String getShortDescription() {
return "K-fold cross validator for the chunker";
}
-
+
public void run(String format, String[] args) {
super.run(format, args);
@@ -89,7 +89,7 @@ public final class ChunkerCrossValidator
// sorry that this can fail
}
}
-
+
if (detailedFMeasureListener == null) {
FMeasure result = validator.getFMeasure();
System.out.println(result.toString());
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java Fri May 2 12:34:23 2014
@@ -37,7 +37,7 @@ import opennlp.tools.util.eval.Evaluatio
public final class ChunkerEvaluatorTool
extends AbstractEvaluatorTool<ChunkSample, EvalToolParams> {
-
+
interface EvalToolParams extends EvaluatorParams, DetailedFMeasureEvaluatorParams {
}
@@ -53,7 +53,7 @@ public final class ChunkerEvaluatorTool
super.run(format, args);
ChunkerModel model = new ChunkerModelLoader().load(params.getModel());
-
+
List<EvaluationMonitor<ChunkSample>> listeners = new LinkedList<EvaluationMonitor<ChunkSample>>();
ChunkerDetailedFMeasureListener detailedFMeasureListener = null;
if(params.getMisclassified()) {
@@ -67,7 +67,7 @@ public final class ChunkerEvaluatorTool
ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model,
ChunkerME.DEFAULT_BEAM_SIZE),
listeners.toArray(new ChunkerEvaluationMonitor[listeners.size()]));
-
+
final PerformanceMonitor monitor = new PerformanceMonitor("sent");
ObjectStream<ChunkSample> measuredSampleStream = new ObjectStream<ChunkSample>() {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerModelLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerModelLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerModelLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerModelLoader.java Fri May 2 12:34:23 2014
@@ -33,7 +33,7 @@ public class ChunkerModelLoader extends
public ChunkerModelLoader() {
super("Chunker");
}
-
+
@Override
protected ChunkerModel loadModel(InputStream modelIn) throws IOException {
return new ChunkerModel(modelIn);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java Fri May 2 12:34:23 2014
@@ -33,7 +33,7 @@ import opennlp.tools.util.model.ModelUti
public class ChunkerTrainerTool
extends AbstractTrainerTool<ChunkSample, TrainerToolParams> {
-
+
interface TrainerToolParams extends TrainingParams, TrainingToolParams {
}
@@ -77,7 +77,7 @@ public class ChunkerTrainerTool
// sorry that this can fail
}
}
-
+
CmdLineUtil.writeModel("chunker", modelOutFile, model);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java Fri May 2 12:34:23 2014
@@ -23,13 +23,13 @@ import opennlp.tools.cmdline.params.Basi
/**
* TrainingParams for Chunker.
- *
+ *
* Note: Do not use this class, internal use only!
*/
interface TrainingParams extends BasicTrainingParams {
-
+
@ParameterDescription(valueName = "factoryName", description = "A sub-class of ChunkerFactory where to get implementation and resources.")
@OptionalParameter
String getFactory();
-
+
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java Fri May 2 12:34:23 2014
@@ -24,7 +24,7 @@ import opennlp.tools.cmdline.params.Enco
/**
* Params for Dictionary tools.
- *
+ *
* Note: Do not use this class, internal use only!
*/
interface DictionaryBuilderParams extends EncodingParameter {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatEvaluationErrorListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatEvaluationErrorListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatEvaluationErrorListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatEvaluationErrorListener.java Fri May 2 12:34:23 2014
@@ -27,7 +27,7 @@ import opennlp.tools.util.eval.Evaluatio
/**
* A default implementation of {@link EvaluationMonitor} that prints to an
* output stream.
- *
+ *
*/
public class DoccatEvaluationErrorListener extends
EvaluationErrorPrinter<DocumentSample> implements DoccatEvaluationMonitor {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatFineGrainedReportListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatFineGrainedReportListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatFineGrainedReportListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatFineGrainedReportListener.java Fri May 2 12:34:23 2014
@@ -45,7 +45,7 @@ import opennlp.tools.util.eval.Mean;
* <p>
* It is possible to use it from an API and access the statistics using the
* provided getters
- *
+ *
*/
public class DoccatFineGrainedReportListener implements DoccatEvaluationMonitor {
@@ -396,7 +396,7 @@ public class DoccatFineGrainedReportList
/**
* Includes a new evaluation data
- *
+ *
* @param tok
* the evaluated token
* @param ref
@@ -703,7 +703,7 @@ public class DoccatFineGrainedReportList
/**
* Creates a new {@link ConfusionMatrixLine}
- *
+ *
* @param ref
* the reference column
*/
@@ -713,7 +713,7 @@ public class DoccatFineGrainedReportList
/**
* Increments the counter for the given column and updates the statistics.
- *
+ *
* @param column
* the column to be incremented
*/
@@ -729,7 +729,7 @@ public class DoccatFineGrainedReportList
/**
* Gets the calculated accuracy of this element
- *
+ *
* @return the accuracy
*/
public double getAccuracy() {
@@ -744,7 +744,7 @@ public class DoccatFineGrainedReportList
/**
* Gets the value given a column
- *
+ *
* @param column
* the column
* @return the counter value
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatModelLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatModelLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatModelLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatModelLoader.java Fri May 2 12:34:23 2014
@@ -33,7 +33,7 @@ public class DoccatModelLoader extends M
public DoccatModelLoader() {
super("Document Categorizer");
}
-
+
@Override
protected DoccatModel loadModel(InputStream modelIn) throws IOException {
return new DoccatModel(modelIn);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java Fri May 2 12:34:23 2014
@@ -42,10 +42,10 @@ public class EntityLinkerTool extends Ba
public String getShortDescription() {
return "links an entity to an external data set";
}
-
+
@Override
public void run(String[] args) {
-
+
if (0 == args.length) {
System.out.println(getHelp());
}
@@ -53,10 +53,10 @@ public class EntityLinkerTool extends Ba
// TODO: Ask Mark if we can remove the type, the user knows upfront if he tries
// to link place names or company mentions ...
String entityType = "location";
-
+
// Load the properties, they should contain everything that is necessary to instantiate
// the component
-
+
// TODO: Entity Linker Properties constructor should not duplicate code
EntityLinkerProperties properties;
try {
@@ -65,9 +65,9 @@ public class EntityLinkerTool extends Ba
catch (IOException e) {
throw new TerminateToolException(-1, "Failed to load the properties file!");
}
-
+
// TODO: It should not just throw Exception.
-
+
EntityLinker entityLinker;
try {
entityLinker = EntityLinkerFactory.getLinker(entityType, properties);
@@ -75,36 +75,36 @@ public class EntityLinkerTool extends Ba
catch (Exception e) {
throw new TerminateToolException(-1, "Failed to instantiate the Entity Linker: " + e.getMessage());
}
-
+
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
perfMon.start();
-
+
try {
-
+
ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream(
new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
-
+
List<NameSample> document = new ArrayList<NameSample>();
-
+
String line;
while ((line = untokenizedLineStream.read()) != null) {
if (line.trim().isEmpty()) {
// Run entity linker ... and output result ...
-
+
StringBuilder text = new StringBuilder();
Span sentences[] = new Span[document.size()];
List<Span> tokens = new ArrayList<Span>();
List<Span> names = new ArrayList<Span>();
-
+
for (int i = 0; i < document.size(); i++) {
-
+
NameSample sample = document.get(i);
-
+
int sentenceBegin = text.length();
-
+
int tokenSentOffset = tokens.size();
-
+
// for all tokens
for (String token : sample.getSentence()) {
int tokenBegin = text.length();
@@ -112,22 +112,22 @@ public class EntityLinkerTool extends Ba
Span tokenSpan = new Span(tokenBegin, text.length());
text.append(" ");
}
-
+
for (Span name : sample.getNames()) {
names.add(new Span(tokenSentOffset + name.getStart(), tokenSentOffset + name.getEnd(), name.getType()));
}
-
+
sentences[i] = new Span(sentenceBegin, text.length());
text.append("\n");
}
-
+
List<Span> linkedSpans = entityLinker.find(text.toString(), sentences, tokens.toArray(new Span[tokens.size()]),
names.toArray(new Span[names.size()]));
-
+
for (int i = 0; i < linkedSpans.size(); i++) {
System.out.println(linkedSpans.get(i));
}
-
+
perfMon.incrementCounter(document.size());
document.clear();
}
@@ -139,7 +139,7 @@ public class EntityLinkerTool extends Ba
catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
-
+
perfMon.stopAndPrintFinalResult();
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java Fri May 2 12:34:23 2014
@@ -48,18 +48,18 @@ public class CensusDictionaryCreatorTool
* Create a list of expected parameters.
*/
interface Parameters {
-
+
@ParameterDescription(valueName = "code")
@OptionalParameter(defaultValue = "en")
String getLang();
-
+
@ParameterDescription(valueName = "charsetName")
@OptionalParameter(defaultValue="UTF-8")
String getEncoding();
-
+
@ParameterDescription(valueName = "censusDict")
String getCensusData();
-
+
@ParameterDescription(valueName = "dict")
String getDict();
}
@@ -107,9 +107,9 @@ public class CensusDictionaryCreatorTool
CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile);
FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData);
- ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn,
+ ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn,
Charset.forName(params.getEncoding()));
-
+
Dictionary mDictionary;
try {
System.out.println("Creating Dictionary...");
@@ -126,9 +126,9 @@ public class CensusDictionaryCreatorTool
}
System.out.println("Saving Dictionary...");
-
+
OutputStream out = null;
-
+
try {
out = new FileOutputStream(dictOutFile);
mDictionary.serialize(out);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameEvaluationErrorListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameEvaluationErrorListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameEvaluationErrorListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameEvaluationErrorListener.java Fri May 2 12:34:23 2014
@@ -27,7 +27,7 @@ import opennlp.tools.util.eval.Evaluatio
/**
* A default implementation of {@link EvaluationMonitor} that prints
* to an output stream.
- *
+ *
*/
public class NameEvaluationErrorListener extends
EvaluationErrorPrinter<NameSample> implements TokenNameFinderEvaluationMonitor {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java Fri May 2 12:34:23 2014
@@ -43,7 +43,7 @@ import opennlp.tools.util.model.ModelUti
public final class TokenNameFinderCrossValidatorTool
extends AbstractCrossValidatorTool<NameSample, CVToolParams> {
-
+
interface CVToolParams extends TrainingParams, CVParams, DetailedFMeasureEvaluatorParams {
}
@@ -73,7 +73,7 @@ public final class TokenNameFinderCrossV
String nameTypes[] = params.getNameTypes().split(",");
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
}
-
+
List<EvaluationMonitor<NameSample>> listeners = new LinkedList<EvaluationMonitor<NameSample>>();
if (params.getMisclassified()) {
listeners.add(new NameEvaluationErrorListener());
@@ -85,16 +85,16 @@ public final class TokenNameFinderCrossV
}
String sequenceCodecImplName = params.getSequenceCodec();
-
+
if ("BIO".equals(sequenceCodecImplName)) {
sequenceCodecImplName = BioCodec.class.getName();
}
else if ("BILOU".equals(sequenceCodecImplName)) {
sequenceCodecImplName = BilouCodec.class.getName();
}
-
+
SequenceCodec<String> sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName);
-
+
TokenNameFinderFactory nameFinderFactory = null;
try {
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(),
@@ -102,7 +102,7 @@ public final class TokenNameFinderCrossV
} catch (InvalidFormatException e) {
throw new TerminateToolException(-1, e.getMessage(), e);
}
-
+
TokenNameFinderCrossValidator validator;
try {
validator = new TokenNameFinderCrossValidator(params.getLang(),
@@ -123,7 +123,7 @@ public final class TokenNameFinderCrossV
System.out.println("done");
System.out.println();
-
+
if(detailedFListener == null) {
System.out.println(validator.getFMeasure());
} else {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java Fri May 2 12:34:23 2014
@@ -59,7 +59,7 @@ public final class TokenNameFinderEvalua
super.run(format, args);
TokenNameFinderModel model = new TokenNameFinderModelLoader().load(params.getModel());
-
+
List<EvaluationMonitor<NameSample>> listeners = new LinkedList<EvaluationMonitor<NameSample>>();
if (params.getMisclassified()) {
listeners.add(new NameEvaluationErrorListener());
@@ -74,7 +74,7 @@ public final class TokenNameFinderEvalua
String nameTypes[] = params.getNameTypes().split(",");
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
}
-
+
TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(
new NameFinderME(model),
listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()]));
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderModelLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderModelLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderModelLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderModelLoader.java Fri May 2 12:34:23 2014
@@ -34,7 +34,7 @@ final public class TokenNameFinderModelL
public TokenNameFinderModelLoader() {
super("Token Name Finder");
}
-
+
@Override
protected TokenNameFinderModel loadModel(InputStream modelIn)
throws IOException, InvalidFormatException {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Fri May 2 12:34:23 2014
@@ -43,7 +43,7 @@ import opennlp.tools.util.model.ModelUti
public final class TokenNameFinderTrainerTool
extends AbstractTrainerTool<NameSample, TrainerToolParams> {
-
+
interface TrainerToolParams extends TrainingParams, TrainingToolParams {
}
@@ -55,14 +55,14 @@ public final class TokenNameFinderTraine
public String getShortDescription() {
return "trainer for the learnable name finder";
}
-
+
static byte[] openFeatureGeneratorBytes(String featureGenDescriptorFile) {
if(featureGenDescriptorFile != null) {
return openFeatureGeneratorBytes(new File(featureGenDescriptorFile));
}
return null;
}
-
+
static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
byte featureGeneratorBytes[] = null;
// load descriptor file into memory
@@ -84,7 +84,7 @@ public final class TokenNameFinderTraine
}
return featureGeneratorBytes;
}
-
+
public static Map<String, Object> loadResources(File resourcePath, File featureGenDescriptor) {
Map<String, Object> resources = new HashMap<String, Object>();
@@ -93,12 +93,12 @@ public final class TokenNameFinderTraine
Map<String, ArtifactSerializer> artifactSerializers = TokenNameFinderModel
.createArtifactSerializers();
-
- // TODO: If there is descriptor file, it should be consulted too
+
+ // TODO: If there is descriptor file, it should be consulted too
if (featureGenDescriptor != null) {
-
+
InputStream xmlDescriptorIn = null;
-
+
try {
artifactSerializers.putAll(GeneratorFactory.extractCustomArtifactSerializerMappings(xmlDescriptorIn));
} catch (IOException e) {
@@ -106,7 +106,7 @@ public final class TokenNameFinderTraine
e.printStackTrace();
}
}
-
+
File resourceFiles[] = resourcePath.listFiles();
// TODO: Filter files, also files with start with a dot
@@ -153,18 +153,18 @@ public final class TokenNameFinderTraine
}
return resources;
}
-
+
static Map<String, Object> loadResources(String resourceDirectory, File featureGeneratorDescriptor) {
if (resourceDirectory != null) {
File resourcePath = new File(resourceDirectory);
-
+
return loadResources(resourcePath, featureGeneratorDescriptor);
}
return new HashMap<String, Object>();
}
-
+
public void run(String format, String[] args) {
super.run(format, args);
@@ -176,32 +176,32 @@ public final class TokenNameFinderTraine
File modelOutFile = params.getModel();
byte featureGeneratorBytes[] = openFeatureGeneratorBytes(params.getFeaturegen());
-
+
// TODO: Support Custom resources:
- // Must be loaded into memory, or written to tmp file until descriptor
+ // Must be loaded into memory, or written to tmp file until descriptor
// is loaded which defines parses when model is loaded
-
+
Map<String, Object> resources = loadResources(params.getResources(), params.getFeaturegen());
-
+
CmdLineUtil.checkOutputFile("name finder model", modelOutFile);
if (params.getNameTypes() != null) {
String nameTypes[] = params.getNameTypes().split(",");
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
}
-
+
String sequenceCodecImplName = params.getSequenceCodec();
-
+
if ("BIO".equals(sequenceCodecImplName)) {
sequenceCodecImplName = BioCodec.class.getName();
}
else if ("BILOU".equals(sequenceCodecImplName)) {
sequenceCodecImplName = BilouCodec.class.getName();
}
-
+
SequenceCodec<String> sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName);
-
+
TokenNameFinderFactory nameFinderFactory = null;
try {
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(),
@@ -209,7 +209,7 @@ public final class TokenNameFinderTraine
} catch (InvalidFormatException e) {
throw new TerminateToolException(-1, e.getMessage(), e);
}
-
+
TokenNameFinderModel model;
try {
model = opennlp.tools.namefind.NameFinderME.train(
@@ -227,7 +227,7 @@ public final class TokenNameFinderTraine
// sorry that this can fail
}
}
-
+
CmdLineUtil.writeModel("name finder", modelOutFile, model);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java Fri May 2 12:34:23 2014
@@ -25,31 +25,31 @@ import opennlp.tools.cmdline.params.Basi
/**
* TrainingParameters for Name Finder.
- *
+ *
* Note: Do not use this class, internal use only!
*/
interface TrainingParams extends BasicTrainingParams {
-
+
@ParameterDescription(valueName = "modelType", description = "The type of the token name finder model")
@OptionalParameter(defaultValue = "default")
String getType();
-
+
@ParameterDescription(valueName = "resourcesDir", description = "The resources directory")
@OptionalParameter
File getResources();
-
+
@ParameterDescription(valueName = "featuregenFile", description = "The feature generator descriptor file")
@OptionalParameter
- File getFeaturegen();
-
+ File getFeaturegen();
+
@OptionalParameter
@ParameterDescription(valueName = "types", description = "name types to use for training")
String getNameTypes();
-
+
@OptionalParameter(defaultValue = "opennlp.tools.namefind.BioCodec")
@ParameterDescription(valueName = "codec", description = "sequence codec used to code name spans")
String getSequenceCodec();
-
+
@ParameterDescription(valueName = "factoryName", description = "A sub-class of TokenNameFinderFactory")
@OptionalParameter
String getFactory();
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicTrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicTrainingParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicTrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicTrainingParams.java Fri May 2 12:34:23 2014
@@ -22,11 +22,11 @@ import opennlp.tools.cmdline.ArgumentPar
/**
* Common training parameters.
- *
+ *
* Note: Do not use this class, internal use only!
*/
public interface BasicTrainingParams extends LanguageParams {
-
+
@ParameterDescription(valueName = "paramsFile", description = "training parameters file.")
@OptionalParameter()
String getParams();
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java Fri May 2 12:34:23 2014
@@ -22,18 +22,18 @@ import opennlp.tools.cmdline.ArgumentPar
/**
* Common cross validator parameters.
- *
+ *
* Note: Do not use this class, internal use only!
*/
public interface CVParams {
-
+
@ParameterDescription(valueName = "true|false",
description = "if true will print false negatives and false positives.")
@OptionalParameter(defaultValue="false")
Boolean getMisclassified();
-
+
@ParameterDescription(valueName = "num", description = "number of folds, default is 10.")
@OptionalParameter(defaultValue="10")
Integer getFolds();
-
+
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java Fri May 2 12:34:23 2014
@@ -23,14 +23,14 @@ import opennlp.tools.cmdline.ArgumentPar
/**
* EvaluatorParams for Chunker.
- *
+ *
* Note: Do not use this class, internal use only!
*/
public interface DetailedFMeasureEvaluatorParams {
-
+
@ParameterDescription(valueName = "true|false",
description = "if true will print detailed FMeasure results.")
@OptionalParameter(defaultValue="false")
Boolean getDetailedF();
-
+
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EncodingParameter.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EncodingParameter.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EncodingParameter.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EncodingParameter.java Fri May 2 12:34:23 2014
@@ -24,7 +24,7 @@ import opennlp.tools.cmdline.ArgumentPar
/**
* Encoding parameter. The DEFAULT_CHARSET is handled by ArgumentParser.Parse().
- *
+ *
* Note: Do not use this class, internal use only!
*/
public interface EncodingParameter {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java Fri May 2 12:34:23 2014
@@ -24,17 +24,17 @@ import opennlp.tools.cmdline.ArgumentPar
/**
* Common evaluation parameters.
- *
+ *
* Note: Do not use this class, internal use only!
*/
public interface EvaluatorParams {
-
+
@ParameterDescription(valueName = "model", description = "the model file to be evaluated.")
File getModel();
-
+
@ParameterDescription(valueName = "true|false",
description = "if true will print false negatives and false positives.")
@OptionalParameter(defaultValue="false")
Boolean getMisclassified();
-
+
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageParams.java Fri May 2 12:34:23 2014
@@ -23,5 +23,5 @@ public interface LanguageParams {
@ParameterDescription(valueName = "language", description = "language which is being processed.")
String getLang();
-
+
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/TrainingToolParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/TrainingToolParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/TrainingToolParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/TrainingToolParams.java Fri May 2 12:34:23 2014
@@ -23,11 +23,11 @@ import opennlp.tools.cmdline.ArgumentPar
/**
* Common training parameters.
- *
+ *
* Note: Do not use this class, internal use only!
*/
public interface TrainingToolParams extends BasicTrainingParams {
-
+
@ParameterDescription(valueName = "modelFile", description = "output model file.")
File getModel();
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java Fri May 2 12:34:23 2014
@@ -34,26 +34,26 @@ public final class BuildModelUpdaterTool
public String getShortDescription() {
return "trains and updates the build model in a parser model";
}
-
+
@Override
protected ParserModel trainAndUpdate(ParserModel originalModel,
ObjectStream<Parse> parseSamples, ModelUpdaterParams parameters)
throws IOException {
-
+
Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), 5);
-
+
parseSamples.reset();
-
+
// TODO: training individual models should be in the chunking parser, not here
// Training build
System.out.println("Training builder");
- ObjectStream<Event> bes = new ParserEventStream(parseSamples,
+ ObjectStream<Event> bes = new ParserEventStream(parseSamples,
originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict);
- AbstractModel buildModel = Parser.train(bes,
+ AbstractModel buildModel = Parser.train(bes,
100, 5);
-
+
parseSamples.close();
-
+
return originalModel.updateBuildModel(buildModel);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java Fri May 2 12:34:23 2014
@@ -35,26 +35,26 @@ public final class CheckModelUpdaterTool
public String getShortDescription() {
return "trains and updates the check model in a parser model";
}
-
+
@Override
protected ParserModel trainAndUpdate(ParserModel originalModel,
ObjectStream<Parse> parseSamples, ModelUpdaterParams parameters)
throws IOException {
-
+
Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), 5);
-
+
parseSamples.reset();
-
+
// TODO: Maybe that should be part of the ChunkingParser ...
// Training build
System.out.println("Training check model");
- ObjectStream<Event> bes = new ParserEventStream(parseSamples,
+ ObjectStream<Event> bes = new ParserEventStream(parseSamples,
originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict);
- AbstractModel checkModel = Parser.train(bes,
+ AbstractModel checkModel = Parser.train(bes,
100, 5);
-
+
parseSamples.close();
-
+
return originalModel.updateCheckModel(checkModel);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ModelUpdaterTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ModelUpdaterTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ModelUpdaterTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ModelUpdaterTool.java Fri May 2 12:34:23 2014
@@ -30,12 +30,12 @@ import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.util.ObjectStream;
-/**
+/**
* Abstract base class for tools which update the parser model.
*/
abstract class ModelUpdaterTool
extends AbstractTypedParamTool<Parse, ModelUpdaterTool.ModelUpdaterParams> {
-
+
interface ModelUpdaterParams extends TrainingToolParams {
}
@@ -50,7 +50,7 @@ abstract class ModelUpdaterTool
public final void run(String format, String[] args) {
ModelUpdaterParams params = validateAndParseParams(
ArgumentParser.filter(args, ModelUpdaterParams.class), ModelUpdaterParams.class);
-
+
// Load model to be updated
File modelFile = params.getModel();
ParserModel originalParserModel = new ParserModelLoader().load(modelFile);
@@ -59,7 +59,7 @@ abstract class ModelUpdaterTool
String[] fargs = ArgumentParser.filter(args, factory.getParameters());
validateFactoryArgs(factory, fargs);
ObjectStream<Parse> sampleStream = factory.create(fargs);
-
+
ParserModel updatedParserModel;
try {
updatedParserModel = trainAndUpdate(originalParserModel, sampleStream, params);
@@ -75,7 +75,7 @@ abstract class ModelUpdaterTool
// sorry that this can fail
}
}
-
+
CmdLineUtil.writeModel("parser", modelFile, updatedParserModel);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserEvaluatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserEvaluatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserEvaluatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserEvaluatorTool.java Fri May 2 12:34:23 2014
@@ -33,18 +33,18 @@ public class ParserEvaluatorTool extends
public ParserEvaluatorTool() {
super(Parse.class, EvaluatorParams.class);
}
-
+
@Override
public void run(String format, String[] args) {
-
+
super.run(format, args);
-
+
ParserModel model = new ParserModelLoader().load(params.getModel());
-
+
Parser parser = ParserFactory.create(model);
-
+
ParserEvaluator evaluator = new ParserEvaluator(parser);
-
+
System.out.print("Evaluating ... ");
try {
evaluator.evaluate(sampleStream);
@@ -60,7 +60,7 @@ public class ParserEvaluatorTool extends
}
}
System.out.println("done");
-
+
System.out.println();
System.out.println(evaluator.getFMeasure());
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserModelLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserModelLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserModelLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserModelLoader.java Fri May 2 12:34:23 2014
@@ -34,7 +34,7 @@ public final class ParserModelLoader ext
public ParserModelLoader() {
super("Parser");
}
-
+
@Override
protected ParserModel loadModel(InputStream modelIn) throws IOException,
InvalidFormatException {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java Fri May 2 12:34:23 2014
@@ -41,7 +41,7 @@ import opennlp.tools.util.model.Artifact
import opennlp.tools.util.model.ModelUtil;
public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerToolParams> {
-
+
interface TrainerToolParams extends TrainingParams, TrainingToolParams, EncodingParameter {
}
@@ -52,10 +52,10 @@ public final class ParserTrainerTool ext
public String getShortDescription() {
return "trains the learnable parser";
}
-
+
static Dictionary buildDictionary(ObjectStream<Parse> parseSamples, HeadRules headRules, int cutoff) {
System.err.print("Building dictionary ...");
-
+
Dictionary mdict;
try {
mdict = Parser.
@@ -65,10 +65,10 @@ public final class ParserTrainerTool ext
mdict = null;
}
System.err.println("done");
-
+
return mdict;
}
-
+
static ParserType parseParserType(String typeAsString) {
ParserType type = null;
if(typeAsString != null && typeAsString.length() > 0) {
@@ -78,16 +78,16 @@ public final class ParserTrainerTool ext
"' is invalid!");
}
}
-
+
return type;
}
-
+
static HeadRules creaeHeadRules(TrainerToolParams params) throws IOException {
-
+
ArtifactSerializer headRulesSerializer = null;
-
+
if (params.getHeadRulesSerializerImpl() != null) {
- headRulesSerializer = ExtensionLoader.instantiateExtension(ArtifactSerializer.class,
+ headRulesSerializer = ExtensionLoader.instantiateExtension(ArtifactSerializer.class,
params.getHeadRulesSerializerImpl());
}
else {
@@ -102,9 +102,9 @@ public final class ParserTrainerTool ext
headRulesSerializer = new opennlp.tools.parser.lang.en.HeadRules.HeadRulesSerializer();
}
}
-
+
Object headRulesObject = headRulesSerializer.create(new FileInputStream(params.getHeadRules()));
-
+
if (headRulesObject instanceof HeadRules) {
return (HeadRules) headRulesObject;
}
@@ -112,30 +112,30 @@ public final class ParserTrainerTool ext
throw new TerminateToolException(-1, "HeadRules Artifact Serializer must create an object of type HeadRules!");
}
}
-
+
// TODO: Add param to train tree insert parser
public void run(String format, String[] args) {
super.run(format, args);
mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
-
+
if (mlParams != null) {
if (!TrainerFactory.isValid(mlParams.getSettings("build"))) {
throw new TerminateToolException(1, "Build training parameters are invalid!");
}
-
+
if (!TrainerFactory.isValid(mlParams.getSettings("check"))) {
throw new TerminateToolException(1, "Check training parameters are invalid!");
}
-
+
if (!TrainerFactory.isValid(mlParams.getSettings("attach"))) {
throw new TerminateToolException(1, "Attach training parameters are invalid!");
}
-
+
if (!TrainerFactory.isValid(mlParams.getSettings("tagger"))) {
throw new TerminateToolException(1, "Tagger training parameters are invalid!");
}
-
+
if (!TrainerFactory.isValid(mlParams.getSettings("chunker"))) {
throw new TerminateToolException(1, "Chunker training parameters are invalid!");
}
@@ -147,16 +147,16 @@ public final class ParserTrainerTool ext
File modelOutFile = params.getModel();
CmdLineUtil.checkOutputFile("parser model", modelOutFile);
-
+
ParserModel model;
try {
HeadRules rules = creaeHeadRules(params);
-
+
ParserType type = parseParserType(params.getParserType());
if(params.getFun()){
Parse.useFunctionTags(true);
}
-
+
if (ParserType.CHUNKING.equals(type)) {
model = opennlp.tools.parser.chunking.Parser.train(
params.getLang(), sampleStream, rules,
@@ -181,7 +181,7 @@ public final class ParserTrainerTool ext
// sorry that this can fail
}
}
-
+
CmdLineUtil.writeModel("parser", modelOutFile, model);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TaggerModelReplacerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TaggerModelReplacerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TaggerModelReplacerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TaggerModelReplacerTool.java Fri May 2 12:34:23 2014
@@ -32,17 +32,17 @@ public final class TaggerModelReplacerTo
public String getShortDescription() {
return "replaces the tagger model in a parser model";
}
-
+
public String getHelp() {
return "Usage: " + CLI.CMD + " " + getName() + " parser.model tagger.model";
}
public void run(String[] args) {
-
+
if (args.length != 2) {
System.out.println(getHelp());
} else {
-
+
File parserModelInFile = new File(args[0]);
ParserModel parserModel = new ParserModelLoader().load(parserModelInFile);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TrainingParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/TrainingParams.java Fri May 2 12:34:23 2014
@@ -25,26 +25,26 @@ import opennlp.tools.cmdline.params.Basi
/**
* TrainingParams for Parser.
- *
+ *
* Note: Do not use this class, internal use only!
*/
interface TrainingParams extends BasicTrainingParams {
-
+
@ParameterDescription(valueName = "CHUNKING|TREEINSERT",
description = "one of CHUNKING or TREEINSERT, default is CHUNKING.")
@OptionalParameter(defaultValue = "CHUNKING")
String getParserType();
-
+
@ParameterDescription(valueName = "className", description = "head rules artifact serializer class name")
@OptionalParameter
String getHeadRulesSerializerImpl();
-
+
@ParameterDescription(valueName = "headRulesFile", description = "head rules file.")
File getHeadRules();
-
+
@ParameterDescription(valueName = "true|false", description = "Learn to generate function tags.")
@OptionalParameter(defaultValue = "false")
Boolean getFun();
-
+
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSEvaluationErrorListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSEvaluationErrorListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSEvaluationErrorListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSEvaluationErrorListener.java Fri May 2 12:34:23 2014
@@ -27,7 +27,7 @@ import opennlp.tools.util.eval.Evaluatio
/**
* A default implementation of {@link EvaluationMonitor} that prints
* to an output stream.
- *
+ *
*/
public class POSEvaluationErrorListener extends
EvaluationErrorPrinter<POSSample> implements POSTaggerEvaluationMonitor {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSModelLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSModelLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSModelLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSModelLoader.java Fri May 2 12:34:23 2014
@@ -34,7 +34,7 @@ public final class POSModelLoader extend
public POSModelLoader() {
super("POS Tagger");
}
-
+
@Override
protected POSModel loadModel(InputStream modelIn) throws IOException,
InvalidFormatException {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java Fri May 2 12:34:23 2014
@@ -37,7 +37,7 @@ import opennlp.tools.util.model.ModelUti
public final class POSTaggerCrossValidatorTool
extends AbstractCrossValidatorTool<POSSample, CVToolParams> {
-
+
interface CVToolParams extends CVParams, TrainingParams {
@ParameterDescription(valueName = "outputFile",
description = "the path of the fine-grained report file.")
@@ -87,7 +87,7 @@ public final class POSTaggerCrossValidat
validator = new POSTaggerCrossValidator(params.getLang(), mlParams,
params.getDict(), params.getNgram(), params.getTagDictCutoff(),
params.getFactory(), missclassifiedListener, reportListener);
-
+
validator.evaluate(sampleStream, params.getFolds());
} catch (IOException e) {
throw new TerminateToolException(-1, "IO error while reading training data or indexing data: "
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerEvaluatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerEvaluatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerEvaluatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerEvaluatorTool.java Fri May 2 12:34:23 2014
@@ -56,7 +56,7 @@ public final class POSTaggerEvaluatorToo
super.run(format, args);
POSModel model = new POSModelLoader().load(params.getModel());
-
+
POSTaggerEvaluationMonitor missclassifiedListener = null;
if (params.getMisclassified()) {
missclassifiedListener = new POSEvaluationErrorListener();
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerFineGrainedReportListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerFineGrainedReportListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerFineGrainedReportListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerFineGrainedReportListener.java Fri May 2 12:34:23 2014
@@ -45,7 +45,7 @@ import opennlp.tools.util.eval.Mean;
* <p>
* It is possible to use it from an API and access the statistics using the
* provided getters
- *
+ *
*/
public class POSTaggerFineGrainedReportListener implements
POSTaggerEvaluationMonitor {
@@ -532,7 +532,7 @@ public class POSTaggerFineGrainedReportL
/**
* Includes a new evaluation data
- *
+ *
* @param tok
* the evaluated token
* @param ref
@@ -839,7 +839,7 @@ public class POSTaggerFineGrainedReportL
/**
* Creates a new {@link ConfusionMatrixLine}
- *
+ *
* @param ref
* the reference column
*/
@@ -849,7 +849,7 @@ public class POSTaggerFineGrainedReportL
/**
* Increments the counter for the given column and updates the statistics.
- *
+ *
* @param column
* the column to be incremented
*/
@@ -865,7 +865,7 @@ public class POSTaggerFineGrainedReportL
/**
* Gets the calculated accuracy of this element
- *
+ *
* @return the accuracy
*/
public double getAccuracy() {
@@ -880,7 +880,7 @@ public class POSTaggerFineGrainedReportL
/**
* Gets the value given a column
- *
+ *
* @param column
* the column
* @return the counter value
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java Fri May 2 12:34:23 2014
@@ -40,7 +40,7 @@ import opennlp.tools.util.model.ModelUti
public final class POSTaggerTrainerTool
extends AbstractTrainerTool<POSSample, TrainerToolParams> {
-
+
interface TrainerToolParams extends TrainingParams, TrainingToolParams {
}
@@ -51,7 +51,7 @@ public final class POSTaggerTrainerTool
public String getShortDescription() {
return "trains a model for the part-of-speech tagger";
}
-
+
public void run(String format, String[] args) {
super.run(format, args);
@@ -70,9 +70,9 @@ public final class POSTaggerTrainerTool
CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);
Dictionary ngramDict = null;
-
+
Integer ngramCutoff = params.getNgram();
-
+
if (ngramCutoff != null) {
System.err.print("Building ngram dictionary ... ");
try {
@@ -140,23 +140,23 @@ public final class POSTaggerTrainerTool
// sorry that this can fail
}
}
-
+
CmdLineUtil.writeModel("pos tagger", modelOutFile, model);
}
-
+
static ModelType getModelType(String modelString) {
ModelType model;
if (modelString == null)
modelString = "maxent";
-
+
if (modelString.equals("maxent")) {
- model = ModelType.MAXENT;
+ model = ModelType.MAXENT;
}
else if (modelString.equals("perceptron")) {
- model = ModelType.PERCEPTRON;
+ model = ModelType.PERCEPTRON;
}
else if (modelString.equals("perceptron_sequence")) {
- model = ModelType.PERCEPTRON_SEQUENCE;
+ model = ModelType.PERCEPTRON_SEQUENCE;
}
else {
model = null;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java Fri May 2 12:34:23 2014
@@ -25,27 +25,27 @@ import opennlp.tools.cmdline.params.Basi
/**
* TrainingParameters for Name Finder.
- *
+ *
* Note: Do not use this class, internal use only!
*/
interface TrainingParams extends BasicTrainingParams {
-
+
@ParameterDescription(valueName = "maxent|perceptron|perceptron_sequence", description = "The type of the token name finder model. One of maxent|perceptron|perceptron_sequence.")
@OptionalParameter(defaultValue = "maxent")
String getType();
-
+
@ParameterDescription(valueName = "dictionaryPath", description = "The XML tag dictionary file")
@OptionalParameter
File getDict();
-
+
@ParameterDescription(valueName = "cutoff", description = "NGram cutoff. If not specified will not create ngram dictionary.")
@OptionalParameter
Integer getNgram();
-
+
@ParameterDescription(valueName = "tagDictCutoff", description = "TagDictionary cutoff. If specified will create/expand a mutable TagDictionary")
@OptionalParameter
Integer getTagDictCutoff();
-
+
@ParameterDescription(valueName = "factoryName", description = "A sub-class of POSTaggerFactory where to get implementation and resources.")
@OptionalParameter
String getFactory();