You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/07/19 22:19:32 UTC

svn commit: r1148511 - in /incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder: EntityContentProvider.java NameFinderJob.java NameFinderViewPage.java

Author: joern
Date: Tue Jul 19 20:19:31 2011
New Revision: 1148511

URL: http://svn.apache.org/viewvc?rev=1148511&view=rev
Log:
OPENNLP-235 Added the name finder to the content provider. Now it runs once and provides the table with a list of names.

Added:
    incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java   (with props)
Modified:
    incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
    incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java

Modified: incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java?rev=1148511&r1=1148510&r2=1148511&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java (original)
+++ incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java Tue Jul 19 20:19:31 2011
@@ -17,23 +17,139 @@
 
 package org.apache.opennlp.caseditor.namefinder;
 
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import opennlp.tools.tokenize.SimpleTokenizer;
+import opennlp.tools.util.Span;
+
+import org.apache.opennlp.caseditor.OpenNLPPlugin;
+import org.apache.opennlp.caseditor.OpenNLPPreferenceConstants;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.caseditor.editor.ICasDocument;
+import org.eclipse.core.runtime.IStatus;
+import org.eclipse.core.runtime.jobs.IJobChangeEvent;
+import org.eclipse.core.runtime.jobs.JobChangeAdapter;
+import org.eclipse.jface.preference.IPreferenceStore;
 import org.eclipse.jface.viewers.IStructuredContentProvider;
 import org.eclipse.jface.viewers.TableViewer;
 import org.eclipse.jface.viewers.Viewer;
+import org.eclipse.swt.widgets.Display;
 
 public class EntityContentProvider implements IStructuredContentProvider {
 
+  private NameFinderJob nameFinder;
+  
   private TableViewer entityList;
   
-  EntityContentProvider(TableViewer entityList) {
+  private ICasDocument input;
+  
+  EntityContentProvider(NameFinderJob nameFinder, TableViewer entityList) {
+    this.nameFinder = nameFinder;
     this.entityList = entityList;
+    
+    nameFinder.addJobChangeListener(new JobChangeAdapter() {
+      public void done(final IJobChangeEvent event) {
+        
+        Display.getDefault().asyncExec(new Runnable() {
+          
+          @Override
+          public void run() {
+            IStatus status = event.getResult();
+            
+            if (status.getSeverity() == IStatus.OK) {
+              List<Entity> potentialEntities = EntityContentProvider.this.nameFinder.getNames();
+              EntityContentProvider.this.entityList.add(potentialEntities.toArray());
+            }
+          }
+        });
+      };
+    });
   }
+  
   public void inputChanged(Viewer viewer, Object oldInput, Object newInput) {
+    
+    input = (ICasDocument) newInput;
+    
+    runNameFinder();
+  }
+  
+  void runNameFinder() {
+    IPreferenceStore store = OpenNLPPlugin.getDefault().getPreferenceStore();
+    String sentenceTypeName = store.getString(OpenNLPPreferenceConstants.SENTENCE_TYPE);
+    String nameTypeName = store.getString(OpenNLPPreferenceConstants.NAME_TYPE);
+    
+    CAS cas = input.getCAS();
+    
+    // just get it from preference store?!
+    // Should have a good way to display an error when the type is incorrect ...
+    Type sentenceType = cas.getTypeSystem().getType(sentenceTypeName); 
+    
+    String text = cas.getDocumentText();
+
+    if (text != null) {
+
+      // get list of sentence annotations
+      // get list of token annotations
+
+      List<Span> sentences = new ArrayList<Span>();
+      List<Span> tokens = new ArrayList<Span>();
+      // get a list on name annotations, they will force the
+      // name finder to detect them ... and maybe maintain a negative list
+
+      FSIndex<AnnotationFS> sentenceAnnotations = cas
+          .getAnnotationIndex(sentenceType);
+
+      for (Iterator<AnnotationFS> sentenceIterator = sentenceAnnotations
+          .iterator(); sentenceIterator.hasNext();) {
+
+        AnnotationFS sentenceAnnotation = (AnnotationFS) sentenceIterator
+            .next();
+
+        sentences.add(new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd()));
+
+        String sentText = sentenceAnnotation.getCoveredText();
+
+        Span tokenSpans[] = SimpleTokenizer.INSTANCE.tokenizePos(sentText);
+
+        int sentenceOffset = sentenceAnnotation.getBegin();
+
+        for (Span token : tokenSpans) {
+          tokens.add(new Span(sentenceOffset + token.getStart(),
+              sentenceOffset + token.getEnd()));
+        }
+      }
+
+      List<Span> nameSpans = new ArrayList<Span>();
+
+      Type nameType = cas.getTypeSystem().getType(nameTypeName); 
+
+      FSIndex<AnnotationFS> nameAnnotations = cas
+          .getAnnotationIndex(nameType);
+
+      for (Iterator<AnnotationFS> nameIterator = nameAnnotations
+          .iterator(); nameIterator.hasNext();) {
+
+        AnnotationFS nameAnnotation = (AnnotationFS) nameIterator.next();
+
+        nameSpans.add(new Span(nameAnnotation.getBegin(), nameAnnotation.getEnd()));
+      }
+      
+      // This will cause issues when it is done while it is running!
+      nameFinder.setText(text);
+      nameFinder.setSentences(sentences.toArray(new Span[sentences.size()]));
+      nameFinder.setTokens(tokens.toArray(new Span[tokens.size()]));
+      
+      nameFinder.schedule();
+    }
   }
   
   public Object[] getElements(Object inputElement) {
-    // return test element
-    return new Entity[] {new Entity(0, 5, "test", 0d)};
+    return new Entity[] {};
   }
   
   public void dispose() {

Added: incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java?rev=1148511&view=auto
==============================================================================
--- incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java (added)
+++ incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java Tue Jul 19 20:19:31 2011
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.opennlp.caseditor.namefinder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.Span;
+
+import org.apache.opennlp.caseditor.OpenNLPPlugin;
+import org.eclipse.core.runtime.IProgressMonitor;
+import org.eclipse.core.runtime.IStatus;
+import org.eclipse.core.runtime.Status;
+import org.eclipse.core.runtime.jobs.Job;
+
+// Add error handling, if something goes wrong, an error should be reported!
+// Need a rule, only one name finder job at a time ...
+// don't change setting, while job is running!
+public class NameFinderJob extends Job {
+  
+  private NameFinderME nameFinder;
+  
+  private String modelPath;
+  private String text;
+  private Span sentences[];
+  private Span tokens[];
+  
+  private List<Entity> nameList;
+  
+  NameFinderJob() {
+    super("Name Finder Job");
+  }
+  
+  synchronized void setModelPath(String modelPath) {
+    this.modelPath = modelPath;
+  }
+  
+  synchronized void setText(String text) {
+    this.text = text;
+  }
+  
+  synchronized void setSentences(Span sentences[]) {
+    this.sentences = sentences;
+  }
+  
+  synchronized void setTokens(Span tokens[]) {
+    this.tokens = tokens;
+  }
+
+  // maybe report result, through an interface?!
+  @Override
+  protected IStatus run(IProgressMonitor monitor) {
+
+    // lazy load model on first run ...
+    if (nameFinder == null) {
+      InputStream modelIn = null;
+      try {
+        modelIn = NameFinderViewPage.class
+            .getResourceAsStream("/en-ner-per.bin");
+        TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
+        nameFinder = new NameFinderME(model, null, 5);
+      } catch (IOException e) {
+        e.printStackTrace();
+      } finally {
+        if (modelIn != null) {
+          try {
+            modelIn.close();
+          } catch (IOException e) {
+          }
+        }
+      }
+    }
+
+    if (nameFinder != null) {
+      nameFinder.clearAdaptiveData();
+    
+      nameList = new ArrayList<Entity>();
+      
+      for (Span sentence : sentences) {
+        
+        // Create token list for sentence
+        List<Span> sentenceTokens = new ArrayList<Span>();
+        
+        for (Span token : tokens) {
+          if (sentence.contains(token)) {
+            sentenceTokens.add(token);
+          }
+        }
+        
+        String tokenStrings[] = new String[sentenceTokens.size()];
+        
+        for (int i = 0; i < sentenceTokens.size(); i++) {
+          Span token = sentenceTokens.get(i);
+          tokenStrings[i] = token.getCoveredText(text).toString();
+        }
+        
+        Span names[] = nameFinder.find(tokenStrings);
+        double nameProbs[] = nameFinder.probs(names);
+        
+        for (int i = 0; i < names.length; i++) {
+          
+          // add sentence offset here ...
+          
+          int beginIndex = sentenceTokens.get(names[i].getStart()).getStart();
+          int endIndex = sentenceTokens.get(names[i].getEnd() - 1).getEnd();
+          
+          String coveredText = text.substring(beginIndex, endIndex);
+          
+          
+          nameList.add(new Entity(beginIndex, endIndex, coveredText, nameProbs[i]));
+        }
+      }
+    }
+    
+    // TODO: If there is a problem return an error status,
+    // and calling client can fetch error message via method call
+    // Use OpenNLPPlugin to log errors ...
+    return new Status(IStatus.OK, OpenNLPPlugin.ID, "OK");
+  }
+
+  public List<Entity> getNames() {
+    List<Entity> names = new ArrayList<Entity>();
+    names.addAll(nameList);
+    return names;
+  }
+}

Propchange: incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java?rev=1148511&r1=1148510&r2=1148511&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java (original)
+++ incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java Tue Jul 19 20:19:31 2011
@@ -59,7 +59,7 @@ class NameFinderViewPage extends Page im
     entityColumn.setWidth(135);
     
     entityList.setLabelProvider(new EntityLabelProvider());
-    entityList.setContentProvider(new EntityContentProvider(entityList));
+    entityList.setContentProvider(new EntityContentProvider(new NameFinderJob(), entityList));
     getSite().setSelectionProvider(entityList);
     
     entityList.setInput(editor.getDocument());