You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/10/14 11:10:25 UTC

svn commit: r1183249 - in /incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor: ./ namefinder/

Author: joern
Date: Fri Oct 14 09:10:25 2011
New Revision: 1183249

URL: http://svn.apache.org/viewvc?rev=1183249&view=rev
Log:
OPENNLP-310 Added an option to enable / disable confirmed name recall boosting

Added:
    incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java   (with props)
Modified:
    incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java
    incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
    incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
    incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java
    incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java

Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java Fri Oct 14 09:10:25 2011
@@ -19,16 +19,22 @@ package org.apache.opennlp.caseditor;
 
 public class OpenNLPPreferenceConstants {
 
-  public static final String SENTENCE_DETECTOR_MODEL_PATH = OpenNLPPlugin.ID + ".SENTENCE_DETECTOR_MODEL_PATH";
-  public static final String NAME_FINDER_MODEL_PATH = OpenNLPPlugin.ID + ".NAME_FINDER_MODEL_PATH";
+  // General settings
+  public static final String SENTENCE_TYPE = OpenNLPPlugin.ID + ".SENTENCE_TYPE";
+  public static final String TOKEN_TYPE = OpenNLPPlugin.ID + ".TOKEN_TYPE";
   
+  // Sentence detector
   public static final String PARAGRAPH_TYPE = OpenNLPPlugin.ID + ".PARAGRAPH_TYPE";
-  public static final String SENTENCE_TYPE = OpenNLPPlugin.ID + ".SENTENCE_TYPE";
+  public static final String SENTENCE_DETECTOR_MODEL_PATH = OpenNLPPlugin.ID + ".SENTENCE_DETECTOR_MODEL_PATH";
+  
+  // Name Finder  
   public static final String ADDITIONAL_SENTENCE_TYPE = OpenNLPPlugin.ID + ".ADDITIONAL_SENTENCE_TYPE";
-  public static final String TOKEN_TYPE = OpenNLPPlugin.ID + ".TOKEN_TYPE";
   public static final String NAME_TYPE = OpenNLPPlugin.ID + ".NAME_TYPE";
+  public static final String NAME_FINDER_MODEL_PATH = OpenNLPPlugin.ID + ".NAME_FINDER_MODEL_PATH";
+  public static final String ENABLE_CONFIRMED_NAME_DETECTION = OpenNLPPlugin.ID + ".ENABLE_RECALL_BOOSTING";
+
+  // Tokenizer
   public static final String TOKENIZER_MODEL_PATH = OpenNLPPlugin.ID + ".TOKENIZER_MODEL_PATH";
-  
   public static final String TOKENIZER_ALGO_STATISTICAL = OpenNLPPlugin.ID + ".TOKENIZER_ALGO_STATISTICAL";
   public static final String TOKENIZER_ALGO_WHITESPACE = OpenNLPPlugin.ID + ".TOKENIZER_ALGO_WHITESPACE";
   public static final String TOKENIZER_ALGO_SIMPLE = OpenNLPPlugin.ID + ".TOKENIZER_ALGO_SIMPLE";

Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java Fri Oct 14 09:10:25 2011
@@ -232,6 +232,83 @@ public class EntityContentProvider imple
     }
   }
   
+  private class NameFinderJobListener extends JobChangeAdapter {
+    public void done(final IJobChangeEvent event) {
+      
+      Display.getDefault().asyncExec(new Runnable() {
+        
+        @Override
+        public void run() {
+          
+          // TODO: Check if view is still available, that might be called after view is disposed.
+          
+          IStatus status = event.getResult();
+          
+          if (status.isOK()) {
+            EntityContentProvider.this.nameFinderView.setMessage(null);
+            
+            List<Entity> detectedEntities = EntityContentProvider.this.nameFinder.getNames();
+            
+            // Remove all detected entities from the last run which are not detected anymore
+            for (Iterator<Entity> it = candidateEntities.iterator(); it.hasNext();) {
+              Entity entity = it.next();
+              if (searchEntity(detectedEntities, entity.getBeginIndex(),
+                  entity.getEndIndex(), entity.getType()) == null)  {
+                
+                // TODO: Create an array of entities that should be removed, much faster ...
+                EntityContentProvider.this.entityListViewer.remove(entity);
+                
+                // Can safely be removed, since it can only be an un-confirmed entity
+                it.remove();
+              }
+            }
+            
+            // Update if entity already exist, or add it
+            for (Entity detectedEntity : detectedEntities) {
+              
+              // Bug: 
+              // There can be multiple entities in this span!
+              // In this case we want to keep the first, update it, and discard the others!
+              
+              // Case: One entity spanning two tokens replaces 
+              
+              Entity entity = searchEntity(candidateEntities, detectedEntity.getBeginIndex(),
+                  detectedEntity.getEndIndex(), detectedEntity.getType());
+              
+              // A confirmed entity already exists, update its confidence score
+              if (entity != null) {
+                if (entity.isConfirmed()) {
+                  entity.setConfidence(detectedEntity.getConfidence());
+                  EntityContentProvider.this.entityListViewer.refresh(entity);
+                  continue;
+                }
+                else {
+                  entity.setBeginIndex(detectedEntity.getBeginIndex());
+                  entity.setEndIndex(detectedEntity.getEndIndex());
+                  entity.setEntityText(detectedEntity.getEntityText());
+                  entity.setConfidence(detectedEntity.getConfidence());
+                  
+                  EntityContentProvider.this.entityListViewer.refresh(entity);
+                }
+              }
+              else {
+                // Only add if it is not a confirmed entity!
+                if (searchEntity(confirmedEntities, detectedEntity.getBeginIndex(),
+                  detectedEntity.getEndIndex(), detectedEntity.getType()) == null) {
+                  EntityContentProvider.this.entityListViewer.add(detectedEntity);
+                  candidateEntities.add(detectedEntity);
+                }
+              }
+            }
+          }
+          else {
+            EntityContentProvider.this.nameFinderView.setMessage(status.getMessage());
+          }
+        }
+      });
+    };
+  }
+  
   private NameFinderJob nameFinder;
   
   private CasChangeNameFinderTrigger casChangeTrigger = new CasChangeNameFinderTrigger();
@@ -341,82 +418,7 @@ public class EntityContentProvider imple
       }
     }
     
-    nameFinder.addJobChangeListener(new JobChangeAdapter() {
-      public void done(final IJobChangeEvent event) {
-        
-        Display.getDefault().asyncExec(new Runnable() {
-          
-          @Override
-          public void run() {
-            
-            // TODO: Check if view is still available, that might be called after view is disposed.
-            
-            IStatus status = event.getResult();
-            
-            if (status.isOK()) {
-              EntityContentProvider.this.nameFinderView.setMessage(null);
-              
-              List<Entity> detectedEntities = EntityContentProvider.this.nameFinder.getNames();
-              
-              // Remove all detected entities from the last run which are not detected anymore
-              for (Iterator<Entity> it = candidateEntities.iterator(); it.hasNext();) {
-                Entity entity = it.next();
-                if (searchEntity(detectedEntities, entity.getBeginIndex(),
-                    entity.getEndIndex(), entity.getType()) == null)  {
-                  
-                  // TODO: Create an array of entities that should be removed, much faster ...
-                  EntityContentProvider.this.entityListViewer.remove(entity);
-                  
-                  // Can safely be removed, since it can only be an un-confirmed entity
-                  it.remove();
-                }
-              }
-              
-              // Update if entity already exist, or add it
-              for (Entity detectedEntity : detectedEntities) {
-                
-                // Bug: 
-                // There can be multiple entities in this span!
-                // In this case we want to keep the first, update it, and discard the others!
-                
-                // Case: One entity spanning two tokens replaces 
-                
-                Entity entity = searchEntity(candidateEntities, detectedEntity.getBeginIndex(),
-                    detectedEntity.getEndIndex(), detectedEntity.getType());
-                
-                // A confirmed entity already exists, update its confidence score
-                if (entity != null) {
-                  if (entity.isConfirmed()) {
-                    entity.setConfidence(detectedEntity.getConfidence());
-                    EntityContentProvider.this.entityListViewer.refresh(entity);
-                    continue;
-                  }
-                  else {
-                    entity.setBeginIndex(detectedEntity.getBeginIndex());
-                    entity.setEndIndex(detectedEntity.getEndIndex());
-                    entity.setEntityText(detectedEntity.getEntityText());
-                    entity.setConfidence(detectedEntity.getConfidence());
-                    
-                    EntityContentProvider.this.entityListViewer.refresh(entity);
-                  }
-                }
-                else {
-                  // Only add if it is not a confirmed entity!
-                  if (searchEntity(confirmedEntities, detectedEntity.getBeginIndex(),
-                    detectedEntity.getEndIndex(), detectedEntity.getType()) == null) {
-                    EntityContentProvider.this.entityListViewer.add(detectedEntity);
-                    candidateEntities.add(detectedEntity);
-                  }
-                }
-              }
-            }
-            else {
-              EntityContentProvider.this.nameFinderView.setMessage(status.getMessage());
-            }
-          }
-        });
-      };
-    });
+    nameFinder.addJobChangeListener(new NameFinderJobListener());
     
     String sentenceTypeName = store.getString(OpenNLPPreferenceConstants.SENTENCE_TYPE);
     
@@ -540,13 +542,22 @@ public class EntityContentProvider imple
       }
       
       nameFinder.setTokens(tokens.toArray(new Span[tokens.size()]));
-      nameFinder.setVerifiedNames(nameSpans.toArray(new Span[nameSpans.size()]));
       nameFinder.setModelPath(modelPathes, nameTypeNames);
       
       if (!nameFinder.isSystem()) {
         nameFinder.setSystem(true);
       }
       
+      boolean isRecallBoostingEnabled = 
+          store.getBoolean(OpenNLPPreferenceConstants.ENABLE_CONFIRMED_NAME_DETECTION);
+      
+      if (isRecallBoostingEnabled) {
+        nameFinder.setVerifiedNames(nameSpans.toArray(new Span[nameSpans.size()]));
+      }
+      else {
+        nameFinder.setVerifiedNames(null);
+      }
+      
       nameFinder.schedule();
     }
   }

Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java Fri Oct 14 09:10:25 2011
@@ -131,44 +131,42 @@ public class NameFinderJob extends Job {
         
         // Note: This is slow!
         // iterate over names, to find token indexes
-        
-        // TODO: This must work with multiple types ...
-        for (Span verifiedName : verifiedNames) {
-          boolean isStart = true;
-        	
-          for (int i = 0; i < sentenceTokens.size(); i++) {
-            if (verifiedName.contains(sentenceTokens.get(i))) {
-              
-              String outcome;
-              
-              // Need better mechanism here, first token in entity should be start!
-              if (isStart) {
-                outcome = NameFinderME.START;
-                isStart = false;
-              }
-              else {
-                outcome = NameFinderME.CONTINUE;
-              }
-              
-              
-              // TODO: Overlapping names are dangerous here!
-              
-              // TODO: We could use type information here ... 
-              // as part of the outcome!
-              verifiedNameTokens.put(i, verifiedName.getType() + "-" + outcome);
-              
-              // TODO: Do not put stop word
-              // Only put, if char length is two
-              // Only put only letters in token
-              StringPattern pattern = StringPattern.recognize(tokenStrings[i]);
-              
-              if (pattern.isAllLetter() && tokenStrings[i].length() > 1) {
-            	  nameTokens.add(verifiedName.getType() + "-" + tokenStrings[i]);
+        if (verifiedNames != null) {
+          for (Span verifiedName : verifiedNames) {
+            boolean isStart = true;
+          	
+            for (int i = 0; i < sentenceTokens.size(); i++) {
+              if (verifiedName.contains(sentenceTokens.get(i))) {
+                
+                String outcome;
+                
+                // Need better mechanism here, first token in entity should be start!
+                if (isStart) {
+                  outcome = NameFinderME.START;
+                  isStart = false;
+                }
+                else {
+                  outcome = NameFinderME.CONTINUE;
+                }
+                
+                // TODO: Overlapping names are dangerous here!
+                
+                // TODO: We could use type information here ... 
+                // as part of the outcome!
+                verifiedNameTokens.put(i, verifiedName.getType() + "-" + outcome);
+                
+                // TODO: Do not put stop word
+                // Only put, if char length is two
+                // Only put only letters in token
+                StringPattern pattern = StringPattern.recognize(tokenStrings[i]);
+                
+                if (pattern.isAllLetter() && tokenStrings[i].length() > 1) {
+              	  nameTokens.add(verifiedName.getType() + "-" + tokenStrings[i]);
+                }
               }
             }
           }
         }
-        
         nameFinder.setRestriction(verifiedNameTokens);
         nameFinder.setNameOnlyTokens(nameTokens);
         

Added: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java?rev=1183249&view=auto
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java (added)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java Fri Oct 14 09:10:25 2011
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.opennlp.caseditor.namefinder;
+
+import org.apache.opennlp.caseditor.OpenNLPPreferenceConstants;
+import org.eclipse.jface.preference.IPreferenceStore;
+
+public class NameFinderPreferenceInitializer {
+
+  static void initializeDefaultPreferences(IPreferenceStore store) {
+    store.setDefault(OpenNLPPreferenceConstants.ENABLE_CONFIRMED_NAME_DETECTION, true);
+  }
+}

Propchange: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java Fri Oct 14 09:10:25 2011
@@ -17,8 +17,8 @@
 
 package org.apache.opennlp.caseditor.namefinder;
 
-import org.apache.opennlp.caseditor.OpenNLPPlugin;
 import org.apache.opennlp.caseditor.OpenNLPPreferenceConstants;
+import org.eclipse.jface.preference.BooleanFieldEditor;
 import org.eclipse.jface.preference.FieldEditorPreferencePage;
 import org.eclipse.jface.preference.StringFieldEditor;
 import org.eclipse.ui.IWorkbench;
@@ -28,7 +28,6 @@ public class NameFinderPreferencePage ex
     implements IWorkbenchPreferencePage {
 
   public NameFinderPreferencePage() {
-//    setPreferenceStore();
     setDescription("Name Finder Preferences.");
   }
 
@@ -52,5 +51,10 @@ public class NameFinderPreferencePage ex
         OpenNLPPreferenceConstants.NAME_TYPE,
         "Name Type(s)", getFieldEditorParent());
     addField(nameType);
+    
+    BooleanFieldEditor enableRecallBoosting = new BooleanFieldEditor(
+        OpenNLPPreferenceConstants.ENABLE_CONFIRMED_NAME_DETECTION,
+        "Enable the detection of confirmed names", getFieldEditorParent());
+    addField(enableRecallBoosting);
   }
 }

Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java Fri Oct 14 09:10:25 2011
@@ -27,6 +27,7 @@ import org.apache.uima.caseditor.editor.
 import org.apache.uima.caseditor.editor.util.AnnotationSelection;
 import org.eclipse.jface.action.IAction;
 import org.eclipse.jface.action.IToolBarManager;
+import org.eclipse.jface.preference.IPreferenceStore;
 import org.eclipse.jface.viewers.ISelection;
 import org.eclipse.jface.viewers.ISelectionChangedListener;
 import org.eclipse.jface.viewers.SelectionChangedEvent;
@@ -65,6 +66,9 @@ class NameFinderViewPage extends Page im
 
   NameFinderViewPage(ICasEditor editor, ICasDocument document) {
     this.editor = editor;
+    
+    IPreferenceStore store = editor.getCasDocumentProvider().getTypeSystemPreferenceStore(editor.getEditorInput());
+    NameFinderPreferenceInitializer.initializeDefaultPreferences(store);
   }
 
   public void createControl(Composite parent) {