You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/07/19 22:19:32 UTC
svn commit: r1148511 - in
/incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder:
EntityContentProvider.java NameFinderJob.java NameFinderViewPage.java
Author: joern
Date: Tue Jul 19 20:19:31 2011
New Revision: 1148511
URL: http://svn.apache.org/viewvc?rev=1148511&view=rev
Log:
OPENNLP-235 Added the name finder to the content provider. Now it runs once and provides the table with a list of names.
Added:
incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java (with props)
Modified:
incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
Modified: incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java?rev=1148511&r1=1148510&r2=1148511&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java (original)
+++ incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java Tue Jul 19 20:19:31 2011
@@ -17,23 +17,139 @@
package org.apache.opennlp.caseditor.namefinder;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import opennlp.tools.tokenize.SimpleTokenizer;
+import opennlp.tools.util.Span;
+
+import org.apache.opennlp.caseditor.OpenNLPPlugin;
+import org.apache.opennlp.caseditor.OpenNLPPreferenceConstants;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.caseditor.editor.ICasDocument;
+import org.eclipse.core.runtime.IStatus;
+import org.eclipse.core.runtime.jobs.IJobChangeEvent;
+import org.eclipse.core.runtime.jobs.JobChangeAdapter;
+import org.eclipse.jface.preference.IPreferenceStore;
import org.eclipse.jface.viewers.IStructuredContentProvider;
import org.eclipse.jface.viewers.TableViewer;
import org.eclipse.jface.viewers.Viewer;
+import org.eclipse.swt.widgets.Display;
public class EntityContentProvider implements IStructuredContentProvider {
+ private NameFinderJob nameFinder;
+
private TableViewer entityList;
- EntityContentProvider(TableViewer entityList) {
+ private ICasDocument input;
+
+ EntityContentProvider(NameFinderJob nameFinder, TableViewer entityList) {
+ this.nameFinder = nameFinder;
this.entityList = entityList;
+
+ nameFinder.addJobChangeListener(new JobChangeAdapter() {
+ public void done(final IJobChangeEvent event) {
+
+ Display.getDefault().asyncExec(new Runnable() {
+
+ @Override
+ public void run() {
+ IStatus status = event.getResult();
+
+ if (status.getSeverity() == IStatus.OK) {
+ List<Entity> potentialEntities = EntityContentProvider.this.nameFinder.getNames();
+ EntityContentProvider.this.entityList.add(potentialEntities.toArray());
+ }
+ }
+ });
+ };
+ });
}
+
public void inputChanged(Viewer viewer, Object oldInput, Object newInput) {
+
+ input = (ICasDocument) newInput;
+
+ runNameFinder();
+ }
+
+ void runNameFinder() {
+ IPreferenceStore store = OpenNLPPlugin.getDefault().getPreferenceStore();
+ String sentenceTypeName = store.getString(OpenNLPPreferenceConstants.SENTENCE_TYPE);
+ String nameTypeName = store.getString(OpenNLPPreferenceConstants.NAME_TYPE);
+
+ CAS cas = input.getCAS();
+
+ // just get it from preference store?!
+ // Should have a good way to display an error when the type is incorrect ...
+ Type sentenceType = cas.getTypeSystem().getType(sentenceTypeName);
+
+ String text = cas.getDocumentText();
+
+ if (text != null) {
+
+ // get list of sentence annotations
+ // get list of token annotations
+
+ List<Span> sentences = new ArrayList<Span>();
+ List<Span> tokens = new ArrayList<Span>();
+ // get a list on name annotations, they will force the
+ // name finder to detect them ... and maybe maintain a negative list
+
+ FSIndex<AnnotationFS> sentenceAnnotations = cas
+ .getAnnotationIndex(sentenceType);
+
+ for (Iterator<AnnotationFS> sentenceIterator = sentenceAnnotations
+ .iterator(); sentenceIterator.hasNext();) {
+
+ AnnotationFS sentenceAnnotation = (AnnotationFS) sentenceIterator
+ .next();
+
+ sentences.add(new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd()));
+
+ String sentText = sentenceAnnotation.getCoveredText();
+
+ Span tokenSpans[] = SimpleTokenizer.INSTANCE.tokenizePos(sentText);
+
+ int sentenceOffset = sentenceAnnotation.getBegin();
+
+ for (Span token : tokenSpans) {
+ tokens.add(new Span(sentenceOffset + token.getStart(),
+ sentenceOffset + token.getEnd()));
+ }
+ }
+
+ List<Span> nameSpans = new ArrayList<Span>();
+
+ Type nameType = cas.getTypeSystem().getType(nameTypeName);
+
+ FSIndex<AnnotationFS> nameAnnotations = cas
+ .getAnnotationIndex(nameType);
+
+ for (Iterator<AnnotationFS> nameIterator = nameAnnotations
+ .iterator(); nameIterator.hasNext();) {
+
+ AnnotationFS nameAnnotation = (AnnotationFS) nameIterator.next();
+
+ nameSpans.add(new Span(nameAnnotation.getBegin(), nameAnnotation.getEnd()));
+ }
+
+ // This will cause issues when it is done while it is running!
+ nameFinder.setText(text);
+ nameFinder.setSentences(sentences.toArray(new Span[sentences.size()]));
+ nameFinder.setTokens(tokens.toArray(new Span[tokens.size()]));
+
+ nameFinder.schedule();
+ }
}
public Object[] getElements(Object inputElement) {
- // return test element
- return new Entity[] {new Entity(0, 5, "test", 0d)};
+ return new Entity[] {};
}
public void dispose() {
Added: incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java?rev=1148511&view=auto
==============================================================================
--- incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java (added)
+++ incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java Tue Jul 19 20:19:31 2011
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.opennlp.caseditor.namefinder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.Span;
+
+import org.apache.opennlp.caseditor.OpenNLPPlugin;
+import org.eclipse.core.runtime.IProgressMonitor;
+import org.eclipse.core.runtime.IStatus;
+import org.eclipse.core.runtime.Status;
+import org.eclipse.core.runtime.jobs.Job;
+
+// Add error handling, if something goes wrong, an error should be reported!
+// Need a rule, only one name finder job at a time ...
+// don't change setting, while job is running!
+public class NameFinderJob extends Job {
+
+ private NameFinderME nameFinder;
+
+ private String modelPath;
+ private String text;
+ private Span sentences[];
+ private Span tokens[];
+
+ private List<Entity> nameList;
+
+ NameFinderJob() {
+ super("Name Finder Job");
+ }
+
+ synchronized void setModelPath(String modelPath) {
+ this.modelPath = modelPath;
+ }
+
+ synchronized void setText(String text) {
+ this.text = text;
+ }
+
+ synchronized void setSentences(Span sentences[]) {
+ this.sentences = sentences;
+ }
+
+ synchronized void setTokens(Span tokens[]) {
+ this.tokens = tokens;
+ }
+
+ // maybe report result, through an interface?!
+ @Override
+ protected IStatus run(IProgressMonitor monitor) {
+
+ // lazy load model on first run ...
+ if (nameFinder == null) {
+ InputStream modelIn = null;
+ try {
+ modelIn = NameFinderViewPage.class
+ .getResourceAsStream("/en-ner-per.bin");
+ TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
+ nameFinder = new NameFinderME(model, null, 5);
+ } catch (IOException e) {
+ e.printStackTrace();
+ } finally {
+ if (modelIn != null) {
+ try {
+ modelIn.close();
+ } catch (IOException e) {
+ }
+ }
+ }
+ }
+
+ if (nameFinder != null) {
+ nameFinder.clearAdaptiveData();
+
+ nameList = new ArrayList<Entity>();
+
+ for (Span sentence : sentences) {
+
+ // Create token list for sentence
+ List<Span> sentenceTokens = new ArrayList<Span>();
+
+ for (Span token : tokens) {
+ if (sentence.contains(token)) {
+ sentenceTokens.add(token);
+ }
+ }
+
+ String tokenStrings[] = new String[sentenceTokens.size()];
+
+ for (int i = 0; i < sentenceTokens.size(); i++) {
+ Span token = sentenceTokens.get(i);
+ tokenStrings[i] = token.getCoveredText(text).toString();
+ }
+
+ Span names[] = nameFinder.find(tokenStrings);
+ double nameProbs[] = nameFinder.probs(names);
+
+ for (int i = 0; i < names.length; i++) {
+
+ // add sentence offset here ...
+
+ int beginIndex = sentenceTokens.get(names[i].getStart()).getStart();
+ int endIndex = sentenceTokens.get(names[i].getEnd() - 1).getEnd();
+
+ String coveredText = text.substring(beginIndex, endIndex);
+
+
+ nameList.add(new Entity(beginIndex, endIndex, coveredText, nameProbs[i]));
+ }
+ }
+ }
+
+ // TODO: If there is a problem return an error status,
+ // and calling client can fetch error message via method call
+ // Use OpenNLPPlugin to log errors ...
+ return new Status(IStatus.OK, OpenNLPPlugin.ID, "OK");
+ }
+
+ public List<Entity> getNames() {
+ List<Entity> names = new ArrayList<Entity>();
+ names.addAll(nameList);
+ return names;
+ }
+}
Propchange: incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java?rev=1148511&r1=1148510&r2=1148511&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java (original)
+++ incubator/opennlp/sandbox/opennlp-caseditor-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java Tue Jul 19 20:19:31 2011
@@ -59,7 +59,7 @@ class NameFinderViewPage extends Page im
entityColumn.setWidth(135);
entityList.setLabelProvider(new EntityLabelProvider());
- entityList.setContentProvider(new EntityContentProvider(entityList));
+ entityList.setContentProvider(new EntityContentProvider(new NameFinderJob(), entityList));
getSite().setSelectionProvider(entityList);
entityList.setInput(editor.getDocument());