You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/07 11:08:03 UTC
svn commit: r1575214 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools:
namefind/BilouCodec.java namefind/BioCodec.java util/SequenceCodec.java
Author: joern
Date: Fri Mar 7 10:08:02 2014
New Revision: 1575214
URL: http://svn.apache.org/r1575214
Log:
OPENNLP-658 Added a method to validate model outcomes against the codec
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java?rev=1575214&r1=1575213&r2=1575214&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java Fri Mar 7 10:08:02 2014
@@ -114,4 +114,8 @@ public class BilouCodec implements Seque
return new BilouNameFinderSequenceValidator();
}
+ @Override
+ public boolean areOutcomesCompatible(String[] outcomes) {
+ return true;
+ }
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java?rev=1575214&r1=1575213&r2=1575214&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java Fri Mar 7 10:08:02 2014
@@ -106,4 +106,43 @@ public class BioCodec implements Sequenc
public NameFinderSequenceValidator createSequenceValidator() {
return new NameFinderSequenceValidator();
}
+
+ @Override
+ public boolean areOutcomesCompatible(String[] outcomes) {
+ // We should have *optionally* one outcome named "other", some named xyz-start and sometimes
+ // they have a pair xyz-cont. We should not have any other outcome
+ // To validate the model we check if we have one outcome named "other", at least
+ // one outcome with suffix start. After that we check if all outcomes that ends with
+ // "cont" have a pair that ends with "start".
+ List<String> start = new ArrayList<String>();
+ List<String> cont = new ArrayList<String>();
+
+ for (int i = 0; i < outcomes.length; i++) {
+ String outcome = outcomes[i];
+ if (outcome.endsWith(NameFinderME.START)) {
+ start.add(outcome.substring(0, outcome.length()
+ - NameFinderME.START.length()));
+ } else if (outcome.endsWith(NameFinderME.CONTINUE)) {
+ cont.add(outcome.substring(0, outcome.length()
+ - NameFinderME.CONTINUE.length()));
+ } else if (outcome.equals(NameFinderME.OTHER)) {
+ // don't fail anymore if couldn't find outcome named OTHER
+ } else {
+ // got unexpected outcome
+ return false;
+ }
+ }
+
+ if (start.size() == 0) {
+ return false;
+ } else {
+ for (String contPreffix : cont) {
+ if (!start.contains(contPreffix)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java?rev=1575214&r1=1575213&r2=1575214&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java Fri Mar 7 10:08:02 2014
@@ -21,7 +21,38 @@ import java.util.List;
public interface SequenceCodec<T> {
+ /**
+ * Decodes a sequence T objects into Span objects.
+ *
+ * @param c
+ *
+ * @return
+ */
Span[] decode(List<T> c);
+
+ /**
+ * Encodes Span objects into a sequence of T objects.
+ *
+ * @param names
+ * @param length
+ *
+ * @return
+ */
T[] encode(Span names[], int length);
- public SequenceValidator<T> createSequenceValidator();
+
+ /**
+ * Creates a sequence validator which can validate a sequence of outcomes.
+ *
+ * @return
+ */
+ SequenceValidator<T> createSequenceValidator();
+
+ /**
+ * Checks if the outcomes of the model are compatible with the codec.
+ *
+ * @param outcomes all possible model outcomes
+ *
+ * @return
+ */
+ boolean areOutcomesCompatible(String[] outcomes);
}