You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/13 01:19:03 UTC
svn commit: r1145814 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools:
cmdline/tokenizer/ tokenize/
Author: colen
Date: Tue Jul 12 23:19:02 2011
New Revision: 1145814
URL: http://svn.apache.org/viewvc?rev=1145814&view=rev
Log:
OPENNLP-220 Added printErrors to Tokenizer evaluation tools
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java?rev=1145814&r1=1145813&r2=1145814&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java Tue Jul 12 23:19:02 2011
@@ -84,7 +84,7 @@ public final class TokenizerCrossValidat
}
try {
- validator.evaluate(sampleStream, 10);
+ validator.evaluate(sampleStream, 10, params.getPrintErrors());
}
catch (IOException e) {
CmdLineUtil.printTrainingIoError(e);
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java?rev=1145814&r1=1145813&r2=1145814&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java Tue Jul 12 23:19:02 2011
@@ -61,7 +61,7 @@ public final class TokenizerMEEvaluatorT
TokenizerModel model = new TokenizerModelLoader().load(params.getModel());
TokenizerEvaluator evaluator = new TokenizerEvaluator(
- new opennlp.tools.tokenize.TokenizerME(model));
+ new opennlp.tools.tokenize.TokenizerME(model), params.getPrintErrors());
System.out.print("Evaluating ... ");
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java?rev=1145814&r1=1145813&r2=1145814&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java Tue Jul 12 23:19:02 2011
@@ -60,8 +60,35 @@ public class TokenizerCrossValidator {
}
- public void evaluate(ObjectStream<TokenSample> samples, int nFolds)
+ /**
+ * Starts the evaluation.
+ *
+ * @param samples
+ * the data to train and test
+ * @param nFolds
+ * number of folds
+ *
+ * @throws IOException
+ */
+ public void evaluate(ObjectStream<TokenSample> samples, int nFolds)
throws IOException {
+ evaluate(samples, nFolds, false);
+ }
+
+ /**
+ * Starts the evaluation.
+ *
+ * @param samples
+ * the data to train and test
+ * @param nFolds
+ * number of folds
+ * @param printErrors
+ * if true will print errors
+ *
+ * @throws IOException
+ */
+ public void evaluate(ObjectStream<TokenSample> samples, int nFolds,
+ boolean printErrors) throws IOException {
CrossValidationPartitioner<TokenSample> partitioner =
new CrossValidationPartitioner<TokenSample>(samples, nFolds);
@@ -83,7 +110,7 @@ public class TokenizerCrossValidator {
alphaNumericOptimization, params);
}
- TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model));
+ TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model), printErrors);
evaluator.evaluate(trainingSampleStream.getTestSampleStream());
fmeasure.mergeInto(evaluator.getFMeasure());
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java?rev=1145814&r1=1145813&r2=1145814&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java Tue Jul 12 23:19:02 2011
@@ -40,7 +40,19 @@ public class TokenizerEvaluator extends
* predicted tokens.
*/
private Tokenizer tokenizer;
-
+
+ /**
+ * Initializes the current instance with the
+ * given {@link Tokenizer}.
+ *
+ * @param tokenizer the {@link Tokenizer} to evaluate.
+ * @param printError should print detailed output
+ */
+ public TokenizerEvaluator(Tokenizer tokenizer, boolean printErrors) {
+ super(printErrors);
+ this.tokenizer = tokenizer;
+ }
+
/**
* Initializes the current instance with the
* given {@link Tokenizer}.
@@ -48,6 +60,7 @@ public class TokenizerEvaluator extends
* @param tokenizer the {@link Tokenizer} to evaluate.
*/
public TokenizerEvaluator(Tokenizer tokenizer) {
+ super();
this.tokenizer = tokenizer;
}
@@ -61,9 +74,17 @@ public class TokenizerEvaluator extends
* @param reference the reference {@link TokenSample}.
*/
public void evaluateSample(TokenSample reference) {
- Span predictedSpans[] = tokenizer.tokenizePos(reference.getText());
+ Span predictions[] = tokenizer.tokenizePos(reference.getText());
+
+ Span[] references = reference.getTokenSpans();
+
+ if (isPrintError()) {
+ String doc = reference.getText();
+ printErrors(references, predictions, reference, new TokenSample(doc,
+ predictions), doc);
+ }
- fmeasure.updateScores(reference.getTokenSpans(), predictedSpans);
+ fmeasure.updateScores(reference.getTokenSpans(), predictions);
}
public FMeasure getFMeasure() {
Re: svn commit: r1145814 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools:
cmdline/tokenizer/ tokenize/
Posted by "william.colen@gmail.com" <wi...@gmail.com>.
On Wed, Jul 13, 2011 at 4:49 AM, Jörn Kottmann <ko...@gmail.com> wrote:
> On 7/13/11 1:19 AM, colen@apache.org wrote:
>
>> public TokenizerEvaluator(Tokenizer tokenizer) {
>> + super();
>> this.tokenizer = tokenizer;
>> }
>>
>
> No reason to call super here, it is called implicitly.
>
Hi Jörn,
Thank you for pointing it. I was using this call to force a compilation
error while I was refactoring the code and forgot to remove it.
Willliam
Re: svn commit: r1145814 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools:
cmdline/tokenizer/ tokenize/
Posted by Jörn Kottmann <ko...@gmail.com>.
On 7/13/11 1:19 AM, colen@apache.org wrote:
> public TokenizerEvaluator(Tokenizer tokenizer) {
> + super();
> this.tokenizer = tokenizer;
> }
No reason to call super here, it is called implicitly.
Jörn