You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/07/19 10:35:49 UTC

svn commit: r1148230 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind: NameFinderME.java NameFinderSequenceValidator.java

Author: joern
Date: Tue Jul 19 08:35:49 2011
New Revision: 1148230

URL: http://svn.apache.org/viewvc?rev=1148230&view=rev
Log:
OPENNLP-228 Moved sequence validator out of the name finder, and made it interchangeable

Added:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java   (with props)
Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1148230&r1=1148229&r2=1148230&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Tue Jul 19 08:35:49 2011
@@ -65,39 +65,7 @@ public class NameFinderME implements Tok
   public static final int DEFAULT_BEAM_SIZE = 3;
   private static final Pattern typedOutcomePattern = Pattern.compile("(.+)-\\w+");
 
-  private static class NameFinderSequenceValidator implements
-      SequenceValidator<String> {
-    
-    public boolean validSequence(int i, String[] inputSequence,
-        String[] outcomesSequence, String outcome) {
-      
-      // outcome is formatted like "cont" or "sometype-cont", so we
-      // can check if it ends with "cont".
-      if (outcome.endsWith(CONTINUE)) {
-        
-        int li = outcomesSequence.length - 1;
-        
-        if (li == -1) {
-          return false;
-        } else if (outcomesSequence[li].endsWith(OTHER)) {
-          return false;
-        } else if (outcomesSequence[li].endsWith(CONTINUE)) {
-          // if it is continue, we have to check if previous match was of the same type 
-          String previousNameType = extractNameType(outcomesSequence[li]);
-          String nameType = extractNameType(outcome);
-          if( previousNameType != null || nameType != null ) {
-            if( nameType != null ) {
-              if( nameType.equals(previousNameType) ){
-                return true;
-              }
-            }
-            return false; // outcomes types are not equal
-          }
-        }
-      }
-      return true;
-    }
-  }
+
 
   public static final String START = "start";
   public static final String CONTINUE = "cont";
@@ -121,7 +89,8 @@ public class NameFinderME implements Tok
    * @param model
    * @param beamSize
    */
-  public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
+  public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize,
+      SequenceValidator<String> sequenceValidator) {
     this.model = model.getNameFinderModel();
     
     // If generator is provided always use that one
@@ -141,10 +110,17 @@ public class NameFinderME implements Tok
     contextGenerator.addFeatureGenerator(
           new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
     
+    if (sequenceValidator == null)
+      sequenceValidator = new NameFinderSequenceValidator();
+    
     beam = new BeamSearch<String>(beamSize, contextGenerator, this.model,
-        new NameFinderSequenceValidator(), beamSize);
+        sequenceValidator, beamSize);
   }
 
+  public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
+    this(model, generator, beamSize, null);
+  }
+  
   public NameFinderME(TokenNameFinderModel model, int beamSize) {
     this(model, null, beamSize);
   }
@@ -493,7 +469,7 @@ public class NameFinderME implements Tok
    * @param outcome the outcome
    * @return the name type, or null if not set
    */
-  private static final String extractNameType(String outcome) {
+  static final String extractNameType(String outcome) {
     Matcher matcher = typedOutcomePattern.matcher(outcome);
     if(matcher.matches()) {
       String nameType = matcher.group(1);

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java?rev=1148230&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java Tue Jul 19 08:35:49 2011
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import opennlp.tools.util.SequenceValidator;
+
+public class NameFinderSequenceValidator implements
+    SequenceValidator<String> {
+  
+  public boolean validSequence(int i, String[] inputSequence,
+      String[] outcomesSequence, String outcome) {
+    
+    // outcome is formatted like "cont" or "sometype-cont", so we
+    // can check if it ends with "cont".
+    if (outcome.endsWith(NameFinderME.CONTINUE)) {
+      
+      int li = outcomesSequence.length - 1;
+      
+      if (li == -1) {
+        return false;
+      } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER)) {
+        return false;
+      } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE)) {
+        // if it is continue, we have to check if previous match was of the same type 
+        String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]);
+        String nameType = NameFinderME.extractNameType(outcome);
+        if( previousNameType != null || nameType != null ) {
+          if( nameType != null ) {
+            if( nameType.equals(previousNameType) ){
+              return true;
+            }
+          }
+          return false; // outcomes types are not equal
+        }
+      }
+    }
+    return true;
+  }
+}
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain