You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/08/01 16:21:36 UTC

svn commit: r1368013 - /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java

Author: colen
Date: Wed Aug  1 14:21:35 2012
New Revision: 1368013

URL: http://svn.apache.org/viewvc?rev=1368013&view=rev
Log:
OPENNLP-530: Changed AD NameFinder corpus to work with contractions with missing <-sam> tag (Amazonia corpus)

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java?rev=1368013&r1=1368012&r2=1368013&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java Wed Aug  1 14:21:35 2012
@@ -264,26 +264,20 @@ public class ADNameSampleStream implemen
 
     if (leftContractionPart != null) {
       // will handle the contraction
-      String tag = leaf.getSecondaryTag();
       String right = leaf.getLexeme();
-      if (tag != null && tag.contains("<-sam>")) {
-        right = leaf.getLexeme();
-        String c = PortugueseContractionUtility.toContraction(leftContractionPart, right);
-
-        if (c != null) {
-          String[] parts = whitespacePattern.split(c);
-          sentence.addAll(Arrays.asList(parts));
-        } else {
-          // contraction was missing!
-          sentence.add(leftContractionPart);
-          sentence.add(right);
-        }
 
+      String c = PortugueseContractionUtility.toContraction(
+          leftContractionPart, right);
+      if (c != null) {
+        String[] parts = whitespacePattern.split(c);
+        sentence.addAll(Arrays.asList(parts));
+        alreadyAdded = true;
       } else {
-        // could not match contraction !
+        // contraction was missing! why?
+        sentence.add(leftContractionPart);
+        // keep alreadyAdded false.
       }
       leftContractionPart = null;
-      alreadyAdded = true;
     }
 
       String namedEntityTag = null;