You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/09/25 23:01:51 UTC
svn commit: r1175575 -
/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/
Author: joern
Date: Sun Sep 25 21:01:50 2011
New Revision: 1175575
URL: http://svn.apache.org/viewvc?rev=1175575&view=rev
Log:
OPENNLP-235 First support for multiple models.
Modified:
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfidenceSpan.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/Entity.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/MultiModelNameFinder.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfidenceSpan.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfidenceSpan.java?rev=1175575&r1=1175574&r2=1175575&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfidenceSpan.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfidenceSpan.java Sun Sep 25 21:01:50 2011
@@ -25,11 +25,15 @@ public class ConfidenceSpan extends Span
private final double confidence;
- public ConfidenceSpan(int s, int e, double confidence) {
- super(s, e);
+ public ConfidenceSpan(int s, int e, double confidence, String type) {
+ super(s, e, type);
this.confidence = confidence;
}
+ public ConfidenceSpan(int s, int e, double confidence) {
+ this(s, e, confidence, null);
+ }
+
double getConfidence() {
return confidence;
}
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/Entity.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/Entity.java?rev=1175575&r1=1175574&r2=1175575&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/Entity.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/Entity.java Sun Sep 25 21:01:50 2011
@@ -24,19 +24,22 @@ import org.eclipse.core.runtime.IAdaptab
// TODO: Rename to PotentialAnnotation, should also contain a type, then we can use
// reuse the code to create an annotation for it.
public class Entity implements IAdaptable {
-
+
private int beginIndex;
private int endIndex;
-
+
private String entityText;
-
+
private boolean isConfirmed;
-
+
private Double confidence;
+ private String type;
+
private AnnotationFS linkedAnnotationFS;
- public Entity(int beginIndex, int endIndex, String entityText, Double confidence, boolean isConfirmed) {
+ public Entity(int beginIndex, int endIndex, String entityText, Double confidence,
+ boolean isConfirmed, String type) {
this.beginIndex = beginIndex;
this.endIndex = endIndex;
@@ -45,6 +48,12 @@ public class Entity implements IAdaptabl
this.confidence = confidence;
this.isConfirmed = isConfirmed;
+
+ this.type = type;
+ }
+
+ public Entity(int beginIndex, int endIndex, String entityText, Double confidence, boolean isConfirmed) {
+ this(beginIndex, endIndex, entityText, confidence, isConfirmed, null);
}
public void setBeginIndex(int beginIndex) {
@@ -71,6 +80,10 @@ public class Entity implements IAdaptabl
return entityText;
}
+ public String getType() {
+ return type;
+ }
+
public void setConfidence(Double confidence) {
this.confidence = confidence;
}
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java?rev=1175575&r1=1175574&r2=1175575&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java Sun Sep 25 21:01:50 2011
@@ -18,6 +18,7 @@
package org.apache.opennlp.caseditor.namefinder;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
@@ -95,12 +96,23 @@ public class EntityContentProvider imple
}
+ private static boolean contains(String array[], String element) {
+
+ for (String arrayElement : array) {
+ if (element.equals(arrayElement))
+ return true;
+ }
+
+ return false;
+ }
+
class ConfirmedEntityListener implements ICasDocumentListener {
+
@Override
public void added(FeatureStructure fs) {
- if (fs instanceof AnnotationFS && fs.getType().getName().equals(nameTypeName)) {
+ if (fs instanceof AnnotationFS && contains(nameTypeNames, fs.getType().getName())) {
// TODO: Check that type matches ...
AnnotationFS annotation = (AnnotationFS) fs;
@@ -130,7 +142,7 @@ public class EntityContentProvider imple
}
else {
Entity newEntity = new Entity(annotation.getBegin(), annotation.getEnd(),
- annotation.getCoveredText(), null, true);
+ annotation.getCoveredText(), null, true, annotation.getType().getName());
EntityContentProvider.this.entityListViewer.add(newEntity);
EntityContentProvider.this.knownEntities.add(newEntity);
@@ -153,7 +165,7 @@ public class EntityContentProvider imple
@Override
public void removed(FeatureStructure fs) {
- if (fs instanceof AnnotationFS && fs.getType().getName().equals(nameTypeName)) {
+ if (fs instanceof AnnotationFS && contains(nameTypeNames, fs.getType().getName())) {
AnnotationFS annotation = (AnnotationFS) fs;
Entity confirmedEntity = searchEntity(EntityContentProvider.this.knownEntities,
@@ -201,17 +213,20 @@ public class EntityContentProvider imple
// contains all existing entity annotations and is synchronized!
// needed by name finder to calculate updates ...
-// private List<Entity> confirmedEntities = new ArrayList<Entity>();
private List<Entity> knownEntities = new ArrayList<Entity>();
- private String nameTypeName;
+ private String nameTypeNames[];
EntityContentProvider(NameFinderJob nameFinder, TableViewer entityList) {
this.nameFinder = nameFinder;
this.entityListViewer = entityList;
IPreferenceStore store = OpenNLPPlugin.getDefault().getPreferenceStore();
- nameTypeName = store.getString(OpenNLPPreferenceConstants.NAME_TYPE);
+ nameTypeNames = store.getString(OpenNLPPreferenceConstants.NAME_TYPE).split(",");
+
+ for (int i = 0; i < nameTypeNames.length; i++) {
+ nameTypeNames[i] = nameTypeNames[i].trim();
+ }
nameFinder.addJobChangeListener(new JobChangeAdapter() {
public void done(final IJobChangeEvent event) {
@@ -294,7 +309,6 @@ public class EntityContentProvider imple
if (newInput != null) {
IPreferenceStore store = OpenNLPPlugin.getDefault().getPreferenceStore();
- String nameTypeName = store.getString(OpenNLPPreferenceConstants.NAME_TYPE);
input = (ICasDocument) newInput;
@@ -303,20 +317,25 @@ public class EntityContentProvider imple
input.addChangeListener(nameFinderTrigger);
// Create initial list of confirmed entities ...
- Type nameType = input.getCAS().getTypeSystem().getType(nameTypeName);
-
- FSIndex<AnnotationFS> nameAnnotations = input.getCAS()
- .getAnnotationIndex(nameType);
- for (Iterator<AnnotationFS> nameIterator = nameAnnotations
- .iterator(); nameIterator.hasNext();) {
+ for (String nameTypeName : nameTypeNames) {
+ Type nameType = input.getCAS().getTypeSystem().getType(nameTypeName);
- AnnotationFS nameAnnotation = (AnnotationFS) nameIterator.next();
+ FSIndex<AnnotationFS> nameAnnotations = input.getCAS()
+ .getAnnotationIndex(nameType);
- Entity entity = new Entity(nameAnnotation.getBegin(),
- nameAnnotation.getEnd(), nameAnnotation.getCoveredText(), null, true);
- entity.setLinkedAnnotation(nameAnnotation);
- knownEntities.add(entity);
+ for (Iterator<AnnotationFS> nameIterator = nameAnnotations
+ .iterator(); nameIterator.hasNext();) {
+
+ AnnotationFS nameAnnotation = (AnnotationFS) nameIterator.next();
+
+ // TODO: Entity must have a type ...
+ Entity entity = new Entity(nameAnnotation.getBegin(),
+ nameAnnotation.getEnd(), nameAnnotation.getCoveredText(), null, true,
+ nameAnnotation.getType().getName());
+ entity.setLinkedAnnotation(nameAnnotation);
+ knownEntities.add(entity);
+ }
}
runNameFinder();
@@ -327,22 +346,25 @@ public class EntityContentProvider imple
IPreferenceStore store = OpenNLPPlugin.getDefault().getPreferenceStore();
String sentenceTypeName = store.getString(OpenNLPPreferenceConstants.SENTENCE_TYPE);
String additionalSentenceTypes = store.getString(OpenNLPPreferenceConstants.ADDITIONAL_SENTENCE_TYPE);
- String nameTypeName = store.getString(OpenNLPPreferenceConstants.NAME_TYPE);
- String modelPath = store.getString(OpenNLPPreferenceConstants.NAME_FINDER_MODEL_PATH);
+
+ String modelPathes[] = store.getString(OpenNLPPreferenceConstants.NAME_FINDER_MODEL_PATH).split(",");
+
+ for (int i = 0; i < modelPathes.length; i++) {
+ modelPathes[i] = modelPathes[i].trim();
+ }
CAS cas = input.getCAS();
// just get it from preference store?!
// Should have a good way to display an error when the type is incorrect ...
-
String text = cas.getDocumentText();
if (text != null) {
List<Span> sentences = new ArrayList<Span>();
- String sentenceTypeNames[] = (sentenceTypeName + "," + additionalSentenceTypes) .split(",");
+ String sentenceTypeNames[] = (sentenceTypeName + "," + additionalSentenceTypes).split(",");
for (String typeName : sentenceTypeNames) {
Type sentenceType = cas.getTypeSystem().getType(typeName.trim());
@@ -359,7 +381,6 @@ public class EntityContentProvider imple
.next();
sentences.add(new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd()));
-
}
}
@@ -384,20 +405,23 @@ public class EntityContentProvider imple
}
}
-
List<Span> nameSpans = new ArrayList<Span>();
- Type nameType = cas.getTypeSystem().getType(nameTypeName);
-
- FSIndex<AnnotationFS> nameAnnotations = cas
- .getAnnotationIndex(nameType);
-
- for (Iterator<AnnotationFS> nameIterator = nameAnnotations
- .iterator(); nameIterator.hasNext();) {
-
- AnnotationFS nameAnnotation = (AnnotationFS) nameIterator.next();
-
- nameSpans.add(new Span(nameAnnotation.getBegin(), nameAnnotation.getEnd()));
+ for (String nameTypeName : nameTypeNames) {
+
+ Type nameType = cas.getTypeSystem().getType(nameTypeName);
+
+ FSIndex<AnnotationFS> nameAnnotations = cas
+ .getAnnotationIndex(nameType);
+
+ for (Iterator<AnnotationFS> nameIterator = nameAnnotations
+ .iterator(); nameIterator.hasNext();) {
+
+ AnnotationFS nameAnnotation = (AnnotationFS) nameIterator.next();
+
+ nameSpans.add(new Span(nameAnnotation.getBegin(), nameAnnotation.getEnd(),
+ nameAnnotation.getType().getName()));
+ }
}
// This will cause issues when it is done while it is running!
@@ -405,7 +429,7 @@ public class EntityContentProvider imple
nameFinder.setSentences(sentences.toArray(new Span[sentences.size()]));
nameFinder.setTokens(tokens.toArray(new Span[tokens.size()]));
nameFinder.setVerifiedNames(nameSpans.toArray(new Span[nameSpans.size()]));
- nameFinder.setModelPath(modelPath);
+ nameFinder.setModelPath(modelPathes, nameTypeNames);
nameFinder.schedule();
}
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/MultiModelNameFinder.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/MultiModelNameFinder.java?rev=1175575&r1=1175574&r2=1175575&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/MultiModelNameFinder.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/MultiModelNameFinder.java Sun Sep 25 21:01:50 2011
@@ -71,11 +71,15 @@ public class MultiModelNameFinder implem
}
}
+
private NameFinderME nameFinders[];
+ private String typeNames[]; // renmame to modelTypes
private RestrictedSequencesValidator sequenceValidator;
- MultiModelNameFinder(String... modelPathes) {
+ MultiModelNameFinder(String modelPathes[], String typeNames[]) {
+
+ this.typeNames = typeNames;
nameFinders = new NameFinderME[modelPathes.length];
@@ -87,8 +91,9 @@ public class MultiModelNameFinder implem
try {
TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
- sequenceValidator = new RestrictedSequencesValidator();
- nameFinders[i] = new NameFinderME(model, null, 5, sequenceValidator);
+// sequenceValidator = new RestrictedSequencesValidator();
+// nameFinders[i] = new NameFinderME(model, null, 5, sequenceValidator);
+ nameFinders[i] = new NameFinderME(model, null, 5);
} catch (IOException e) {
e.printStackTrace();
} finally {
@@ -103,16 +108,16 @@ public class MultiModelNameFinder implem
}
+ // Needs to be changed, so different models are supported
void setRestriction(Map<Integer, String> nameIndex) {
- sequenceValidator.setRestriction(nameIndex);
+// sequenceValidator.setRestriction(nameIndex);
}
// Needs to be changed, so it can be about different categories ...
void setNameOnlyTokens(Set<String> nameOnlyTokens) {
- sequenceValidator.setNameOnlyTokens(nameOnlyTokens);
+// sequenceValidator.setNameOnlyTokens(nameOnlyTokens);
}
-
@Override
public void clearAdaptiveData() {
for (NameFinderME nameFinder : nameFinders) {
@@ -125,14 +130,15 @@ public class MultiModelNameFinder implem
List<ConfidenceSpan> names = new ArrayList<ConfidenceSpan>();
- for (NameFinderME nameFinder : nameFinders) {
+ for (int i = 0; i < nameFinders.length; i++) {
+ NameFinderME nameFinder = nameFinders[i];
Span detectedNames[] = nameFinder.find(sentence);
double confidence[] = nameFinder.probs();
- for (int i = 0; i < detectedNames.length; i++) {
+ for (int j = 0; j < detectedNames.length; j++) {
// TODO: Also add type ...
- names.add(new ConfidenceSpan(detectedNames[i].getStart(), detectedNames[i].getEnd(),
- confidence[i]));
+ names.add(new ConfidenceSpan(detectedNames[j].getStart(), detectedNames[j].getEnd(),
+ confidence[j], typeNames[i]));
}
}
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java?rev=1175575&r1=1175574&r2=1175575&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java Sun Sep 25 21:01:50 2011
@@ -17,8 +17,6 @@
package org.apache.opennlp.caseditor.namefinder;
-import java.io.IOException;
-import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -27,12 +25,9 @@ import java.util.Map;
import java.util.Set;
import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.NameFinderSequenceValidator;
-import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.Span;
import opennlp.tools.util.featuregen.StringPattern;
-import org.apache.opennlp.caseditor.ModelUtil;
import org.apache.opennlp.caseditor.OpenNLPPlugin;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.IStatus;
@@ -46,9 +41,11 @@ public class NameFinderJob extends Job {
private MultiModelNameFinder nameFinder;
-// private RestrictedSequencesValidator sequenceValidator;
- private String modelPath;
+ private String modelPath[];
+
+ private String modelTypes[];
+
private String text;
private Span sentences[];
private Span tokens[];
@@ -63,8 +60,9 @@ public class NameFinderJob extends Job {
/**
* @param modelPath
*/
- synchronized void setModelPath(String modelPath) {
- this.modelPath = modelPath;
+ synchronized void setModelPath(String modelPathes[], String modelTypes[]) {
+ this.modelPath = modelPathes;
+ this.modelTypes = modelTypes;
}
synchronized void setText(String text) {
@@ -90,26 +88,7 @@ public class NameFinderJob extends Job {
// lazy load model on first run ... how to lazy initialize multiple name finders?
if (nameFinder == null) {
-
- // load multiple name finders here
-
- InputStream modelIn = ModelUtil.openModelIn(modelPath);
-
-// try {
-// TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
-// sequenceValidator = new RestrictedSequencesValidator();
-// nameFinder = new NameFinderME(model, null, 5, sequenceValidator);
- nameFinder = new MultiModelNameFinder(modelPath);
-// } catch (IOException e) {
-// e.printStackTrace();
-// } finally {
-// if (modelIn != null) {
-// try {
-// modelIn.close();
-// } catch (IOException e) {
-// }
-// }
-// }
+ nameFinder = new MultiModelNameFinder(modelPath, modelTypes);
}
if (nameFinder != null) {
@@ -143,8 +122,8 @@ public class NameFinderJob extends Job {
// Note: This is slow!
// iterate over names, to find token indexes
-
+ // TODO: This must work with multiple types ...
for (Span verifiedName : verifiedNames) {
boolean isStart = true;
@@ -191,9 +170,8 @@ public class NameFinderJob extends Job {
String coveredText = text.substring(beginIndex, endIndex);
-
nameList.add(new Entity(beginIndex, endIndex, coveredText,
- names[i].getConfidence(), false));
+ names[i].getConfidence(), false, names[i].getType()));
}
}
}
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java?rev=1175575&r1=1175574&r2=1175575&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java Sun Sep 25 21:01:50 2011
@@ -38,19 +38,19 @@ public class NameFinderPreferencePage ex
@Override
protected void createFieldEditors() {
- StringFieldEditor modelPath = new StringFieldEditor(
- OpenNLPPreferenceConstants.NAME_FINDER_MODEL_PATH,
- "Model Path", getFieldEditorParent());
- addField(modelPath);
-
StringFieldEditor additionalSentenceTypes = new StringFieldEditor(
OpenNLPPreferenceConstants.ADDITIONAL_SENTENCE_TYPE,
"Additional Sentence Types", getFieldEditorParent());
addField(additionalSentenceTypes);
+ StringFieldEditor modelPath = new StringFieldEditor(
+ OpenNLPPreferenceConstants.NAME_FINDER_MODEL_PATH,
+ "Model Path(es)", getFieldEditorParent());
+ addField(modelPath);
+
StringFieldEditor nameType = new StringFieldEditor(
OpenNLPPreferenceConstants.NAME_TYPE,
- "Name Type", getFieldEditorParent());
+ "Name Type(s)", getFieldEditorParent());
addField(nameType);
}
}
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java?rev=1175575&r1=1175574&r2=1175575&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java Sun Sep 25 21:01:50 2011
@@ -145,7 +145,7 @@ class NameFinderViewPage extends Page im
// If that annotation exist, then match it.
// Bug: Need to check the type also ...
Entity entity = new Entity(firstAnnotation.getBegin(), firstAnnotation.getEnd(),
- firstAnnotation.getCoveredText(), null, true);
+ firstAnnotation.getCoveredText(), null, true, firstAnnotation.getType().getName());
ISelection tableSelection = new StructuredSelection(entity);
entityList.setSelection(tableSelection, true);