You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/13 01:19:03 UTC

svn commit: r1145814 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/tokenizer/ tokenize/

Author: colen
Date: Tue Jul 12 23:19:02 2011
New Revision: 1145814

URL: http://svn.apache.org/viewvc?rev=1145814&view=rev
Log:
OPENNLP-220 Added printErrors to Tokenizer evaluation tools

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java?rev=1145814&r1=1145813&r2=1145814&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java Tue Jul 12 23:19:02 2011
@@ -84,7 +84,7 @@ public final class TokenizerCrossValidat
     }
       
     try {
-      validator.evaluate(sampleStream, 10);
+      validator.evaluate(sampleStream, 10, params.getPrintErrors());
     }
     catch (IOException e) {
       CmdLineUtil.printTrainingIoError(e);

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java?rev=1145814&r1=1145813&r2=1145814&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java Tue Jul 12 23:19:02 2011
@@ -61,7 +61,7 @@ public final class TokenizerMEEvaluatorT
     TokenizerModel model = new TokenizerModelLoader().load(params.getModel());
 
     TokenizerEvaluator evaluator = new TokenizerEvaluator(
-        new opennlp.tools.tokenize.TokenizerME(model));
+        new opennlp.tools.tokenize.TokenizerME(model), params.getPrintErrors());
 
     System.out.print("Evaluating ... ");
     

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java?rev=1145814&r1=1145813&r2=1145814&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java Tue Jul 12 23:19:02 2011
@@ -60,8 +60,35 @@ public class TokenizerCrossValidator {
   }
   
   
-  public void evaluate(ObjectStream<TokenSample> samples, int nFolds) 
+  /**
+   * Starts the evaluation.
+   * 
+   * @param samples
+   *          the data to train and test
+   * @param nFolds
+   *          number of folds
+   * 
+   * @throws IOException
+   */
+  public void evaluate(ObjectStream<TokenSample> samples, int nFolds)
       throws IOException {
+    evaluate(samples, nFolds, false);
+  }
+
+  /**
+   * Starts the evaluation.
+   * 
+   * @param samples
+   *          the data to train and test
+   * @param nFolds
+   *          number of folds
+   * @param printErrors
+   *          if true will print errors
+   * 
+   * @throws IOException
+   */
+  public void evaluate(ObjectStream<TokenSample> samples, int nFolds,
+      boolean printErrors) throws IOException {
     
     CrossValidationPartitioner<TokenSample> partitioner = 
       new CrossValidationPartitioner<TokenSample>(samples, nFolds);
@@ -83,7 +110,7 @@ public class TokenizerCrossValidator {
              alphaNumericOptimization, params);
        }
        
-       TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model));
+       TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model), printErrors);
        evaluator.evaluate(trainingSampleStream.getTestSampleStream());
        fmeasure.mergeInto(evaluator.getFMeasure());
      }

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java?rev=1145814&r1=1145813&r2=1145814&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java Tue Jul 12 23:19:02 2011
@@ -40,7 +40,19 @@ public class TokenizerEvaluator extends 
    * predicted tokens.
    */
   private Tokenizer tokenizer;
-
+  
+  /**
+   * Initializes the current instance with the
+   * given {@link Tokenizer}.
+   *
+   * @param tokenizer the {@link Tokenizer} to evaluate.
+   * @param printError should print detailed output
+   */
+  public TokenizerEvaluator(Tokenizer tokenizer, boolean printErrors) {
+    super(printErrors);
+    this.tokenizer = tokenizer;
+  }
+  
   /**
    * Initializes the current instance with the
    * given {@link Tokenizer}.
@@ -48,6 +60,7 @@ public class TokenizerEvaluator extends 
    * @param tokenizer the {@link Tokenizer} to evaluate.
    */
   public TokenizerEvaluator(Tokenizer tokenizer) {
+    super();
     this.tokenizer = tokenizer;
   }
 
@@ -61,9 +74,17 @@ public class TokenizerEvaluator extends 
    * @param reference the reference {@link TokenSample}.
    */
   public void evaluateSample(TokenSample reference) {
-    Span predictedSpans[] = tokenizer.tokenizePos(reference.getText());
+    Span predictions[] = tokenizer.tokenizePos(reference.getText());
+
+    Span[] references = reference.getTokenSpans();
+
+    if (isPrintError()) {
+      String doc = reference.getText();
+      printErrors(references, predictions, reference, new TokenSample(doc,
+          predictions), doc);
+    }
 
-    fmeasure.updateScores(reference.getTokenSpans(), predictedSpans);
+    fmeasure.updateScores(reference.getTokenSpans(), predictions);
   }
   
   public FMeasure getFMeasure() {



Re: svn commit: r1145814 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/tokenizer/ tokenize/

Posted by "william.colen@gmail.com" <wi...@gmail.com>.
On Wed, Jul 13, 2011 at 4:49 AM, Jörn Kottmann <ko...@gmail.com> wrote:

> On 7/13/11 1:19 AM, colen@apache.org wrote:
>
>>    public TokenizerEvaluator(Tokenizer tokenizer) {
>> +    super();
>>      this.tokenizer = tokenizer;
>>    }
>>
>
> No reason to call super here, it is called implicitly.
>

Hi Jörn,

Thank you for pointing it. I was using this call to force a compilation
error while I was refactoring the code and forgot to remove it.

Willliam

Re: svn commit: r1145814 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/tokenizer/ tokenize/

Posted by Jörn Kottmann <ko...@gmail.com>.
On 7/13/11 1:19 AM, colen@apache.org wrote:
>     public TokenizerEvaluator(Tokenizer tokenizer) {
> +    super();
>       this.tokenizer = tokenizer;
>     }

No reason to call super here, it is called implicitly.

Jörn