You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/12/06 14:34:00 UTC

svn commit: r1210904 - in /incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder: EntityContentProvider.java NameFinderJob.java

Author: joern
Date: Tue Dec  6 13:34:00 2011
New Revision: 1210904

URL: http://svn.apache.org/viewvc?rev=1210904&view=rev
Log:
OPENNLP-324 Added configuration options and added initial capital letter filter.

Modified:
    incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
    incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java

Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java?rev=1210904&r1=1210903&r2=1210904&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java Tue Dec  6 13:34:00 2011
@@ -50,6 +50,10 @@ import org.eclipse.jface.viewers.TableVi
 import org.eclipse.jface.viewers.Viewer;
 import org.eclipse.swt.widgets.Display;
 
+/**
+ * The EntityContentProvider is responsible to trigger the detection of entities
+ * and turn these into potential entity annotations.
+ */
 // Need its own list (or map), otherwise it is complicated to compute updates ...
 // Maybe we should create again, a "View" map of indexes to its annotations?!
 public class EntityContentProvider implements IStructuredContentProvider {
@@ -97,7 +101,7 @@ public class EntityContentProvider imple
         
         // Remove all entities from the view and candidate list
         // TODO: Refactor this code branch ...
-        //       Now it only needs to remove all intersecting entites from the
+        //       Now it only needs to remove all intersecting entities from the
         //       candidate list and add the entity itself to the confirmed list
         
         int selectionIndex = EntityContentProvider.this.entityListViewer.
@@ -108,7 +112,6 @@ public class EntityContentProvider imple
           entity.setBeginIndex(annotation.getBegin());
           entity.setEndIndex(annotation.getEnd());
           entity.setEntityText(annotation.getCoveredText());
-//          entity.setConfirmed(true);
           entity.setConfidence(null);
           
           entityListViewer.remove(entity);
@@ -340,8 +343,11 @@ public class EntityContentProvider imple
   
   void runNameFinder() {
     
-    IPreferenceStore store = editor.getCasDocumentProvider().getTypeSystemPreferenceStore(editor.getEditorInput());
+    // TODO: Check if sentences do overlap
+    // TODO: Check if tokens do overlap
+    // TODO: Check that tokens do not intersect with sentence span
     
+    IPreferenceStore store = editor.getCasDocumentProvider().getTypeSystemPreferenceStore(editor.getEditorInput());
     
     // TODO: All preferences should be retrieved when the name finder executed!
     // Just move it down the run method ...
@@ -521,6 +527,15 @@ public class EntityContentProvider imple
         nameFinder.setVerifiedNames(null);
       }
       
+      nameFinder.setIgnoreShortTokens(store.getBoolean(
+          OpenNLPPreferenceConstants.IGNORE_SHORT_TOKENS));
+      
+      nameFinder.setOnlyConsiderAllLetterTokens(store.getBoolean(
+          OpenNLPPreferenceConstants.ONLY_CONSIDER_ALL_LETTER_TOKENS));
+      
+      nameFinder.setOnlyConsiderInitialCapitalLetterTokens(store.getBoolean(
+          OpenNLPPreferenceConstants.ONLY_CONSIDER_INITIAL_CAPITAL_TOKENS));
+      
       nameFinder.schedule();
     }
   }

Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java?rev=1210904&r1=1210903&r2=1210904&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java Tue Dec  6 13:34:00 2011
@@ -54,6 +54,12 @@ public class NameFinderJob extends Job {
   private Span verifiedNames[] = new Span[0];
   
   private List<PotentialAnnotation> nameList;
+
+  private boolean ignoreShortTokens;
+
+  private boolean onlyConsiderAllLetterTokens;
+
+  private boolean onlyConsiderInitialLetterTokens;
   
   NameFinderJob() {
     super("Name Finder Job");
@@ -83,6 +89,18 @@ public class NameFinderJob extends Job {
     this.verifiedNames = verifiedNames;
   }
 
+  synchronized void setIgnoreShortTokens(boolean ignoreShortTokens) {
+    this.ignoreShortTokens = ignoreShortTokens;
+  }
+  
+  synchronized void setOnlyConsiderAllLetterTokens(boolean onlyConsiderAllLetterTokens) {
+    this.onlyConsiderAllLetterTokens = onlyConsiderAllLetterTokens; 
+  }
+  
+  synchronized void setOnlyConsiderInitialCapitalLetterTokens(boolean onlyConsiderInitialLetterTokens) {
+    this.onlyConsiderInitialLetterTokens = onlyConsiderInitialLetterTokens;
+  }
+  
   // maybe report result, through an interface?!
   // Note: Concurrency issue ... here! Editor might already be closed after model is loaded!
   // The job change listener in the Entity Content Provider must handle that!
@@ -156,12 +174,21 @@ public class NameFinderJob extends Job {
                 // as part of the outcome!
                 verifiedNameTokens.put(i, verifiedName.getType() + "-" + outcome);
                 
-                // TODO: Do not put stop word
-                // Only put, if char length is two
-                // Only put only letters in token
                 StringPattern pattern = StringPattern.recognize(tokenStrings[i]);
                 
-                if (pattern.isAllLetter() && tokenStrings[i].length() > 1) {
+                boolean useToken = true;
+                
+                if (ignoreShortTokens && tokenStrings[i].length() < 4) {
+                  useToken = false;
+                }
+                else if (onlyConsiderAllLetterTokens && !pattern.isAllLetter()) {
+                  useToken = false;
+                }
+                else if (onlyConsiderInitialLetterTokens && !pattern.isInitialCapitalLetter()) {
+                  useToken = false;
+                }
+                  
+                if (useToken) {
               	  nameTokens.add(verifiedName.getType() + "-" + tokenStrings[i]);
                 }
               }