You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/07/19 10:35:49 UTC
svn commit: r1148230 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind:
NameFinderME.java NameFinderSequenceValidator.java
Author: joern
Date: Tue Jul 19 08:35:49 2011
New Revision: 1148230
URL: http://svn.apache.org/viewvc?rev=1148230&view=rev
Log:
OPENNLP-228 Moved sequence validator out of the name finder, and made it interchangeable
Added:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java (with props)
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1148230&r1=1148229&r2=1148230&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Tue Jul 19 08:35:49 2011
@@ -65,39 +65,7 @@ public class NameFinderME implements Tok
public static final int DEFAULT_BEAM_SIZE = 3;
private static final Pattern typedOutcomePattern = Pattern.compile("(.+)-\\w+");
- private static class NameFinderSequenceValidator implements
- SequenceValidator<String> {
-
- public boolean validSequence(int i, String[] inputSequence,
- String[] outcomesSequence, String outcome) {
-
- // outcome is formatted like "cont" or "sometype-cont", so we
- // can check if it ends with "cont".
- if (outcome.endsWith(CONTINUE)) {
-
- int li = outcomesSequence.length - 1;
-
- if (li == -1) {
- return false;
- } else if (outcomesSequence[li].endsWith(OTHER)) {
- return false;
- } else if (outcomesSequence[li].endsWith(CONTINUE)) {
- // if it is continue, we have to check if previous match was of the same type
- String previousNameType = extractNameType(outcomesSequence[li]);
- String nameType = extractNameType(outcome);
- if( previousNameType != null || nameType != null ) {
- if( nameType != null ) {
- if( nameType.equals(previousNameType) ){
- return true;
- }
- }
- return false; // outcomes types are not equal
- }
- }
- }
- return true;
- }
- }
+
public static final String START = "start";
public static final String CONTINUE = "cont";
@@ -121,7 +89,8 @@ public class NameFinderME implements Tok
* @param model
* @param beamSize
*/
- public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
+ public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize,
+ SequenceValidator<String> sequenceValidator) {
this.model = model.getNameFinderModel();
// If generator is provided always use that one
@@ -141,10 +110,17 @@ public class NameFinderME implements Tok
contextGenerator.addFeatureGenerator(
new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
+ if (sequenceValidator == null)
+ sequenceValidator = new NameFinderSequenceValidator();
+
beam = new BeamSearch<String>(beamSize, contextGenerator, this.model,
- new NameFinderSequenceValidator(), beamSize);
+ sequenceValidator, beamSize);
}
+ public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
+ this(model, generator, beamSize, null);
+ }
+
public NameFinderME(TokenNameFinderModel model, int beamSize) {
this(model, null, beamSize);
}
@@ -493,7 +469,7 @@ public class NameFinderME implements Tok
* @param outcome the outcome
* @return the name type, or null if not set
*/
- private static final String extractNameType(String outcome) {
+ static final String extractNameType(String outcome) {
Matcher matcher = typedOutcomePattern.matcher(outcome);
if(matcher.matches()) {
String nameType = matcher.group(1);
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java?rev=1148230&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java Tue Jul 19 08:35:49 2011
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import opennlp.tools.util.SequenceValidator;
+
+public class NameFinderSequenceValidator implements
+ SequenceValidator<String> {
+
+ public boolean validSequence(int i, String[] inputSequence,
+ String[] outcomesSequence, String outcome) {
+
+ // outcome is formatted like "cont" or "sometype-cont", so we
+ // can check if it ends with "cont".
+ if (outcome.endsWith(NameFinderME.CONTINUE)) {
+
+ int li = outcomesSequence.length - 1;
+
+ if (li == -1) {
+ return false;
+ } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER)) {
+ return false;
+ } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE)) {
+ // if it is continue, we have to check if previous match was of the same type
+ String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]);
+ String nameType = NameFinderME.extractNameType(outcome);
+ if( previousNameType != null || nameType != null ) {
+ if( nameType != null ) {
+ if( nameType.equals(previousNameType) ){
+ return true;
+ }
+ }
+ return false; // outcomes types are not equal
+ }
+ }
+ }
+ return true;
+ }
+}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain