You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/07 11:08:03 UTC

svn commit: r1575214 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: namefind/BilouCodec.java namefind/BioCodec.java util/SequenceCodec.java

Author: joern
Date: Fri Mar  7 10:08:02 2014
New Revision: 1575214

URL: http://svn.apache.org/r1575214
Log:
OPENNLP-658 Added a method to validate model outcomes against the codec

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java?rev=1575214&r1=1575213&r2=1575214&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java Fri Mar  7 10:08:02 2014
@@ -114,4 +114,8 @@ public class BilouCodec implements Seque
     return new BilouNameFinderSequenceValidator();
   }
   
+  @Override
+  public boolean areOutcomesCompatible(String[] outcomes) {
+    return true;
+  }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java?rev=1575214&r1=1575213&r2=1575214&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java Fri Mar  7 10:08:02 2014
@@ -106,4 +106,43 @@ public class BioCodec implements Sequenc
   public NameFinderSequenceValidator createSequenceValidator() {
     return new NameFinderSequenceValidator();
   }
+  
+  @Override
+  public boolean areOutcomesCompatible(String[] outcomes) {
+    // We should have *optionally* one outcome named "other", some named xyz-start and sometimes 
+    // they have a pair xyz-cont. We should not have any other outcome
+    // To validate the model we check if we have one outcome named "other", at least
+    // one outcome with suffix start. After that we check if all outcomes that ends with
+    // "cont" have a pair that ends with "start".
+    List<String> start = new ArrayList<String>();
+    List<String> cont = new ArrayList<String>();
+
+    for (int i = 0; i < outcomes.length; i++) {
+      String outcome = outcomes[i];
+      if (outcome.endsWith(NameFinderME.START)) {
+        start.add(outcome.substring(0, outcome.length()
+            - NameFinderME.START.length()));
+      } else if (outcome.endsWith(NameFinderME.CONTINUE)) {
+        cont.add(outcome.substring(0, outcome.length()
+            - NameFinderME.CONTINUE.length()));
+      } else if (outcome.equals(NameFinderME.OTHER)) {
+        // don't fail anymore if couldn't find outcome named OTHER
+      } else {
+        // got unexpected outcome
+        return false;
+      }
+    }
+
+    if (start.size() == 0) {
+      return false;
+    } else {
+      for (String contPreffix : cont) {
+        if (!start.contains(contPreffix)) {
+          return false;
+        }
+      }
+    }
+
+    return true;
+  }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java?rev=1575214&r1=1575213&r2=1575214&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java Fri Mar  7 10:08:02 2014
@@ -21,7 +21,38 @@ import java.util.List;
 
 public interface SequenceCodec<T> {
 
+  /**
+   * Decodes a sequence T objects into Span objects.
+   * 
+   * @param c
+   * 
+   * @return
+   */
   Span[] decode(List<T> c);
+  
+  /**
+   * Encodes Span objects into a sequence of T objects.
+   * 
+   * @param names
+   * @param length
+   * 
+   * @return
+   */
   T[] encode(Span names[], int length);
-  public SequenceValidator<T> createSequenceValidator();
+  
+  /**
+   * Creates a sequence validator which can validate a sequence of outcomes.
+   * 
+   * @return
+   */
+  SequenceValidator<T> createSequenceValidator();
+  
+  /**
+   * Checks if the outcomes of the model are compatible with the codec.
+   * 
+   * @param outcomes all possible model outcomes
+   * 
+   * @return
+   */
+  boolean areOutcomesCompatible(String[] outcomes);
 }