You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/03/21 19:24:35 UTC

opennlp git commit: OPENNLP-1005: Implement areOutcomesCompatible for BilouCodec

Repository: opennlp
Updated Branches:
  refs/heads/master c2abe862e -> 19a56adb0


OPENNLP-1005: Implement areOutcomesCompatible for BilouCodec

This issue closes #144


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/19a56adb
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/19a56adb
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/19a56adb

Branch: refs/heads/master
Commit: 19a56adb00295d528015045cacd443ff10f7bcb2
Parents: c2abe86
Author: Peter Thygesen <pe...@gmail.com>
Authored: Thu Mar 16 11:19:58 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Tue Mar 21 20:22:40 2017 +0100

----------------------------------------------------------------------
 .../java/opennlp/tools/namefind/BilouCodec.java |  61 +++
 .../opennlp/tools/namefind/BilouCodecTest.java  | 375 +++++++++++++++++++
 2 files changed, 436 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/19a56adb/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
index 7e8508a..50cc4bf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
@@ -19,7 +19,9 @@ package opennlp.tools.namefind;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import opennlp.tools.util.SequenceCodec;
 import opennlp.tools.util.SequenceValidator;
@@ -111,8 +113,67 @@ public class BilouCodec implements SequenceCodec<String> {
     return new BilouNameFinderSequenceValidator();
   }
 
+  /**
+   * B requires CL or L
+   * C requires BL
+   * L requires B
+   * O requires any valid combo/unit
+   * U requires none
+   *
+   * @param outcomes all possible model outcomes
+   *
+   * @return true, if model outcomes are compatible
+   */
   @Override
   public boolean areOutcomesCompatible(String[] outcomes) {
+    Set<String> start = new HashSet<>();
+    Set<String> cont = new HashSet<>();
+    Set<String> last = new HashSet<>();
+    Set<String> unit = new HashSet<>();
+
+    for (int i = 0; i < outcomes.length; i++) {
+      String outcome = outcomes[i];
+      if (outcome.endsWith(BilouCodec.START)) {
+        start.add(outcome.substring(0, outcome.length()
+            - BilouCodec.START.length()));
+      } else if (outcome.endsWith(BilouCodec.CONTINUE)) {
+        cont.add(outcome.substring(0, outcome.length()
+            - BilouCodec.CONTINUE.length()));
+      } else if (outcome.endsWith(BilouCodec.LAST)) {
+        last.add(outcome.substring(0, outcome.length()
+            - BilouCodec.LAST.length()));
+      } else if (outcome.endsWith(BilouCodec.UNIT)) {
+        unit.add(outcome.substring(0, outcome.length()
+            - BilouCodec.UNIT.length()));
+      } else if (!outcome.equals(BilouCodec.OTHER)) {
+        return false;
+      }
+    }
+
+    if (start.size() == 0 && unit.size() == 0) {
+      return false;
+    } else {
+      // Start, must have matching Last
+      for (String startPrefix : start) {
+        if (!last.contains(startPrefix)) {
+          return false;
+        }
+      }
+      // Cont, must have matching Start and Last
+      for (String contPrefix : cont) {
+        if (!start.contains(contPrefix) && !last.contains(contPrefix)) {
+          return false;
+        }
+      }
+      // Last, must have matching Start
+      for (String lastPrefix : last) {
+        if (!start.contains(lastPrefix)) {
+          return false;
+        }
+      }
+
+    }
+
     return true;
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/19a56adb/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
index 96d939f..353c7e4 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
@@ -206,4 +206,379 @@ public class BilouCodecTest {
     Assert.assertArrayEquals(expected, actual);
   }
 
+
+  @Test
+  public void testCompatibilityEmpty() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {}));
+  }
+
+  /**
+   * Singles and singles in combination with other valid type (unit/start+last)
+   */
+
+  /**
+   * B-Start => Fail
+   * A-Unit, B-Start => Fail
+   * A-Start, A-Last, B-Start => Fail
+   */
+  @Test
+  public void testCompatibilitySinglesStart() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START}));
+  }
+
+  /**
+   * B-Continue => Fail
+   * A-Unit, B-Continue => Fail
+   * A-Start, A-Last, B-Continue => Fail
+   */
+  @Test
+  public void testCompatibilitySinglesContinue() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE}));
+  }
+
+  /**
+   * B-Last => Fail
+   * A-Unit, B-Last => Fail
+   * A-Start, A-Last, B-Last => Fail
+   */
+  @Test
+  public void testCompatibilitySinglesLast() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST}));
+  }
+
+  /**
+   * Other => Fail
+   * A-Unit, Other => Pass
+   * A-Start, A-Last, Other => Pass
+   */
+  @Test
+  public void testCompatibilitySinglesOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, OTHER}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, OTHER}));
+  }
+
+  /**
+   * B-Unit => Pass
+   * A-Unit, B-Unit => Pass
+   * A-Start, A-Last, B-Unit => Pass
+   */
+  @Test
+  public void testCompatibilitySinglesUnit() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_UNIT}));
+  }
+
+  /**
+   * Doubles and doubles in combination with other valid type (unit/start+last)
+   *
+   * B-Start, B-Continue => Fail
+   * A-Unit, B-Start, B-Continue => Fail
+   * A-Start, A-Last, B-Start, B-Continue => Fail
+   */
+  @Test
+  public void testCompatibilityStartContinue() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, B_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_CONTINUE}));
+  }
+
+  /**
+   * B-Start, B-Last => Pass
+   * A-Unit, B-Start, B-Last => Pass
+   * A-Start, A-Last, B-Start, B-Last => Pass
+   */
+  @Test
+  public void testCompatibilityStartLast() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {B_START, B_LAST}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_LAST}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_LAST}));
+  }
+
+  /**
+   * B-Start, Other => Fail
+   * A-Unit, B-Start, Other => Fail
+   * A-Start, A-Last, B-Start, Other => Fail
+   */
+  @Test
+  public void testCompatibilityStartOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, OTHER}));
+  }
+
+  /**
+   * B-Start, B-Unit => Fail
+   * A-Unit, B-Start, B-Unit => Fail
+   * A-Start, A-Last, B-Start, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityStartUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_UNIT}));
+  }
+
+  /**
+   * B-Continue, C-Last => Fail
+   * A-Unit, B-Continue, C-Last => Fail
+   * A-Start, A-Last, B-Continue, B-Last => Fail
+   */
+  @Test
+  public void testCompatibilityContinueLast() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, B_LAST}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, B_LAST}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, B_LAST}));
+  }
+
+  /**
+   * B-Continue, Other => Fail
+   * A-Unit, B-Continue, Other => Fail
+   * A-Start, A-Last, B-Continue, Other => Fail
+   */
+  @Test
+  public void testCompatibilityContinueOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, OTHER}));
+  }
+
+  /**
+   * B-Continue, B-Unit => Fail
+   * A-Unit, B-Continue, B-Unit => Fail
+   * A-Start, A-Last, B-Continue, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityContinueUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, B_UNIT}));
+  }
+
+  /**
+   * B-Last, Other => Fail
+   * A-Unit, B-Last, Other => Fail
+   * A-Start, A-Last, B-Last, Other => Fail
+   */
+  @Test
+  public void testCompatibilityLastOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST, OTHER}));
+  }
+
+  /**
+   * B-Last, B-Unit => Fail
+   * A-Unit, B-Last, B-Unit => Fail
+   * A-Start, A-Last, B-Last, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityLastUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST, B_UNIT}));
+  }
+
+  /**
+   * Other, B-Unit => Pass
+   * A-Unit, Other, B-Unit => Pass
+   * A-Start, A-Last, Other, B-Unit => Pass
+   */
+  @Test
+  public void testCompatibilityOtherUnit() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {OTHER, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, OTHER, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, OTHER, B_UNIT}));
+  }
+
+  /**
+   * Triples and triples in combination with other valid type (unit/start+last)
+   *
+   * B-Start, B-Continue, B-Last => Pass
+   * A-Unit, B-Start, B-Continue, B-Last => Pass
+   * A-Start, A-Last, B-Start, B-Continue, B-Last => Pass
+   */
+  @Test
+  public void testCompatibilityStartContinueLast() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_LAST}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST}));
+  }
+
+  /**
+   * B-Start, B-Continue, Other => Fail
+   * A-Unit, B-Start, B-Continue, Other => Fail
+   * A-Start, A-Last, B-Start, B-Continue, Other => Fail
+   */
+  @Test
+  public void testCompatibilityStartContinueOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, OTHER}));
+  }
+
+  /**
+   * B-Start, B-Continue, B-Unit => Fail
+   * A-Unit, B-Start, B-Continue, B-Unit => Fail
+   * A-Start, A-Last, B-Start, B-Continue, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityStartContinueUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_UNIT}));
+  }
+
+  /**
+   * B-Continue, B-Last, Other => Fail
+   * A-Unit, B-Continue, B-Last, Other => Fail
+   * A-Start, A-Last, B-Continue, B-Last, Other => Fail
+   */
+  @Test
+  public void testCompatibilityContinueLastOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_CONTINUE, B_LAST, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_CONTINUE, B_LAST, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, OTHER}));
+  }
+
+  /**
+   * B-Continue, B-Last, B-Unit => Fail
+   * A-Unit, B-Continue, B-Last, B_Unit => Fail
+   * A-Start, A-Last, B-Continue, B-Last, B_Unit => Fail
+   */
+  @Test
+  public void testCompatibilityContinueLastUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_CONTINUE, B_LAST, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_CONTINUE, B_LAST, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, B_UNIT}));
+  }
+
+  /**
+   * B-Last, Other, B-Unit => Fail
+   * A-Unit, B-Continue, B-Last, B_Unit => Fail
+   * A-Start, A-Last, B-Continue, B-Last, B_Unit => Fail
+   */
+  @Test
+  public void testCompatibilityLastOtherUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_LAST, OTHER, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_LAST, OTHER, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_LAST, OTHER, B_UNIT}));
+  }
+
+  /**
+   * Quadruples and quadruple in combination of unit/start+last
+   *
+   * B-Start, B-Continue, B-Last, Other => Pass
+   * A-Unit, B-Start, B-Continue, B-Last, Other => Pass
+   * A-Start, A-Last, B-Start, B-Continue, B-Last, Other => Pass
+   */
+  @Test
+  public void testCompatibilityStartContinueLastOther() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_LAST, OTHER}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, OTHER}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, OTHER}));
+  }
+
+  /**
+   * B-Start, B-Continue, B-Last, B-Unit => Pass
+   * A-Unit, B-Start, B-Continue, B-Last, B-Unit => Pass
+   * A-Start, A-Last, B-Start, B-Continue, B-Last, B-Unit => Pass
+   */
+  @Test
+  public void testCompatibilityStartContinueLastUnit() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_LAST, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, B_UNIT}));
+  }
+
+
+  /**
+   * B-Continue, B-Last, Other, B-Unit => Fail
+   * A-Unit, B-Continue, B-Last, Other, B-Unit => Fail
+   * A-Start, A-Last, B-Continue, B-Last, Other, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityContinueLastOtherUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+  }
+
+  /**
+   * Quintuple
+   *
+   * B-Start, B-Continue, B-Last, Other, B-Unit => Pass
+   * A-Unit, B-Start, B-Continue, B-Last, Other, B-Unit => Pass
+   * A-Staart, A-Last, B-Start, B-Continue, B-Last, Other, B-Unit => Pass
+   */
+  @Test
+  public void testCompatibilityUnitOther() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+  }
+
+  /**
+   * Multiclass
+   */
+  @Test
+  public void testCompatibilityMultiClass() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_UNIT, A_CONTINUE, A_LAST, A_UNIT,
+            B_START, B_LAST, A_START, C_UNIT, OTHER}));
+  }
+
+  /**
+   * Bad combinations
+   */
+  @Test
+  public void testCompatibilityBadTag() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_CONTINUE, OTHER, "BAD"}));
+  }
+
+  @Test
+  public void testCompatibilityWrongClass() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, B_LAST, OTHER}));
+  }
+
+
+
 }