You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/10/14 11:10:25 UTC
svn commit: r1183249 - in
/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor:
./ namefinder/
Author: joern
Date: Fri Oct 14 09:10:25 2011
New Revision: 1183249
URL: http://svn.apache.org/viewvc?rev=1183249&view=rev
Log:
OPENNLP-310 Added an option to enable / disable confirmed name recall boosting
Added:
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java (with props)
Modified:
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java
incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java Fri Oct 14 09:10:25 2011
@@ -19,16 +19,22 @@ package org.apache.opennlp.caseditor;
public class OpenNLPPreferenceConstants {
- public static final String SENTENCE_DETECTOR_MODEL_PATH = OpenNLPPlugin.ID + ".SENTENCE_DETECTOR_MODEL_PATH";
- public static final String NAME_FINDER_MODEL_PATH = OpenNLPPlugin.ID + ".NAME_FINDER_MODEL_PATH";
+ // General settings
+ public static final String SENTENCE_TYPE = OpenNLPPlugin.ID + ".SENTENCE_TYPE";
+ public static final String TOKEN_TYPE = OpenNLPPlugin.ID + ".TOKEN_TYPE";
+ // Sentence detector
public static final String PARAGRAPH_TYPE = OpenNLPPlugin.ID + ".PARAGRAPH_TYPE";
- public static final String SENTENCE_TYPE = OpenNLPPlugin.ID + ".SENTENCE_TYPE";
+ public static final String SENTENCE_DETECTOR_MODEL_PATH = OpenNLPPlugin.ID + ".SENTENCE_DETECTOR_MODEL_PATH";
+
+ // Name Finder
public static final String ADDITIONAL_SENTENCE_TYPE = OpenNLPPlugin.ID + ".ADDITIONAL_SENTENCE_TYPE";
- public static final String TOKEN_TYPE = OpenNLPPlugin.ID + ".TOKEN_TYPE";
public static final String NAME_TYPE = OpenNLPPlugin.ID + ".NAME_TYPE";
+ public static final String NAME_FINDER_MODEL_PATH = OpenNLPPlugin.ID + ".NAME_FINDER_MODEL_PATH";
+ public static final String ENABLE_CONFIRMED_NAME_DETECTION = OpenNLPPlugin.ID + ".ENABLE_RECALL_BOOSTING";
+
+ // Tokenizer
public static final String TOKENIZER_MODEL_PATH = OpenNLPPlugin.ID + ".TOKENIZER_MODEL_PATH";
-
public static final String TOKENIZER_ALGO_STATISTICAL = OpenNLPPlugin.ID + ".TOKENIZER_ALGO_STATISTICAL";
public static final String TOKENIZER_ALGO_WHITESPACE = OpenNLPPlugin.ID + ".TOKENIZER_ALGO_WHITESPACE";
public static final String TOKENIZER_ALGO_SIMPLE = OpenNLPPlugin.ID + ".TOKENIZER_ALGO_SIMPLE";
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java Fri Oct 14 09:10:25 2011
@@ -232,6 +232,83 @@ public class EntityContentProvider imple
}
}
+ private class NameFinderJobListener extends JobChangeAdapter {
+ public void done(final IJobChangeEvent event) {
+
+ Display.getDefault().asyncExec(new Runnable() {
+
+ @Override
+ public void run() {
+
+ // TODO: Check if view is still available, that might be called after view is disposed.
+
+ IStatus status = event.getResult();
+
+ if (status.isOK()) {
+ EntityContentProvider.this.nameFinderView.setMessage(null);
+
+ List<Entity> detectedEntities = EntityContentProvider.this.nameFinder.getNames();
+
+ // Remove all detected entities from the last run which are not detected anymore
+ for (Iterator<Entity> it = candidateEntities.iterator(); it.hasNext();) {
+ Entity entity = it.next();
+ if (searchEntity(detectedEntities, entity.getBeginIndex(),
+ entity.getEndIndex(), entity.getType()) == null) {
+
+ // TODO: Create an array of entities that should be removed, much faster ...
+ EntityContentProvider.this.entityListViewer.remove(entity);
+
+ // Can safely be removed, since it can only be an un-confirmed entity
+ it.remove();
+ }
+ }
+
+ // Update if entity already exist, or add it
+ for (Entity detectedEntity : detectedEntities) {
+
+ // Bug:
+ // There can be multiple entities in this span!
+ // In this case we want to keep the first, update it, and discard the others!
+
+ // Case: One entity spanning two tokens replaces
+
+ Entity entity = searchEntity(candidateEntities, detectedEntity.getBeginIndex(),
+ detectedEntity.getEndIndex(), detectedEntity.getType());
+
+ // A confirmed entity already exists, update its confidence score
+ if (entity != null) {
+ if (entity.isConfirmed()) {
+ entity.setConfidence(detectedEntity.getConfidence());
+ EntityContentProvider.this.entityListViewer.refresh(entity);
+ continue;
+ }
+ else {
+ entity.setBeginIndex(detectedEntity.getBeginIndex());
+ entity.setEndIndex(detectedEntity.getEndIndex());
+ entity.setEntityText(detectedEntity.getEntityText());
+ entity.setConfidence(detectedEntity.getConfidence());
+
+ EntityContentProvider.this.entityListViewer.refresh(entity);
+ }
+ }
+ else {
+ // Only add if it is not a confirmed entity!
+ if (searchEntity(confirmedEntities, detectedEntity.getBeginIndex(),
+ detectedEntity.getEndIndex(), detectedEntity.getType()) == null) {
+ EntityContentProvider.this.entityListViewer.add(detectedEntity);
+ candidateEntities.add(detectedEntity);
+ }
+ }
+ }
+ }
+ else {
+ EntityContentProvider.this.nameFinderView.setMessage(status.getMessage());
+ }
+ }
+ });
+ };
+ }
+
private NameFinderJob nameFinder;
private CasChangeNameFinderTrigger casChangeTrigger = new CasChangeNameFinderTrigger();
@@ -341,82 +418,7 @@ public class EntityContentProvider imple
}
}
- nameFinder.addJobChangeListener(new JobChangeAdapter() {
- public void done(final IJobChangeEvent event) {
-
- Display.getDefault().asyncExec(new Runnable() {
-
- @Override
- public void run() {
-
- // TODO: Check if view is still available, that might be called after view is disposed.
-
- IStatus status = event.getResult();
-
- if (status.isOK()) {
- EntityContentProvider.this.nameFinderView.setMessage(null);
-
- List<Entity> detectedEntities = EntityContentProvider.this.nameFinder.getNames();
-
- // Remove all detected entities from the last run which are not detected anymore
- for (Iterator<Entity> it = candidateEntities.iterator(); it.hasNext();) {
- Entity entity = it.next();
- if (searchEntity(detectedEntities, entity.getBeginIndex(),
- entity.getEndIndex(), entity.getType()) == null) {
-
- // TODO: Create an array of entities that should be removed, much faster ...
- EntityContentProvider.this.entityListViewer.remove(entity);
-
- // Can safely be removed, since it can only be an un-confirmed entity
- it.remove();
- }
- }
-
- // Update if entity already exist, or add it
- for (Entity detectedEntity : detectedEntities) {
-
- // Bug:
- // There can be multiple entities in this span!
- // In this case we want to keep the first, update it, and discard the others!
-
- // Case: One entity spanning two tokens replaces
-
- Entity entity = searchEntity(candidateEntities, detectedEntity.getBeginIndex(),
- detectedEntity.getEndIndex(), detectedEntity.getType());
-
- // A confirmed entity already exists, update its confidence score
- if (entity != null) {
- if (entity.isConfirmed()) {
- entity.setConfidence(detectedEntity.getConfidence());
- EntityContentProvider.this.entityListViewer.refresh(entity);
- continue;
- }
- else {
- entity.setBeginIndex(detectedEntity.getBeginIndex());
- entity.setEndIndex(detectedEntity.getEndIndex());
- entity.setEntityText(detectedEntity.getEntityText());
- entity.setConfidence(detectedEntity.getConfidence());
-
- EntityContentProvider.this.entityListViewer.refresh(entity);
- }
- }
- else {
- // Only add if it is not a confirmed entity!
- if (searchEntity(confirmedEntities, detectedEntity.getBeginIndex(),
- detectedEntity.getEndIndex(), detectedEntity.getType()) == null) {
- EntityContentProvider.this.entityListViewer.add(detectedEntity);
- candidateEntities.add(detectedEntity);
- }
- }
- }
- }
- else {
- EntityContentProvider.this.nameFinderView.setMessage(status.getMessage());
- }
- }
- });
- };
- });
+ nameFinder.addJobChangeListener(new NameFinderJobListener());
String sentenceTypeName = store.getString(OpenNLPPreferenceConstants.SENTENCE_TYPE);
@@ -540,13 +542,22 @@ public class EntityContentProvider imple
}
nameFinder.setTokens(tokens.toArray(new Span[tokens.size()]));
- nameFinder.setVerifiedNames(nameSpans.toArray(new Span[nameSpans.size()]));
nameFinder.setModelPath(modelPathes, nameTypeNames);
if (!nameFinder.isSystem()) {
nameFinder.setSystem(true);
}
+ boolean isRecallBoostingEnabled =
+ store.getBoolean(OpenNLPPreferenceConstants.ENABLE_CONFIRMED_NAME_DETECTION);
+
+ if (isRecallBoostingEnabled) {
+ nameFinder.setVerifiedNames(nameSpans.toArray(new Span[nameSpans.size()]));
+ }
+ else {
+ nameFinder.setVerifiedNames(null);
+ }
+
nameFinder.schedule();
}
}
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java Fri Oct 14 09:10:25 2011
@@ -131,44 +131,42 @@ public class NameFinderJob extends Job {
// Note: This is slow!
// iterate over names, to find token indexes
-
- // TODO: This must work with multiple types ...
- for (Span verifiedName : verifiedNames) {
- boolean isStart = true;
-
- for (int i = 0; i < sentenceTokens.size(); i++) {
- if (verifiedName.contains(sentenceTokens.get(i))) {
-
- String outcome;
-
- // Need better mechanism here, first token in entity should be start!
- if (isStart) {
- outcome = NameFinderME.START;
- isStart = false;
- }
- else {
- outcome = NameFinderME.CONTINUE;
- }
-
-
- // TODO: Overlapping names are dangerous here!
-
- // TODO: We could use type information here ...
- // as part of the outcome!
- verifiedNameTokens.put(i, verifiedName.getType() + "-" + outcome);
-
- // TODO: Do not put stop word
- // Only put, if char length is two
- // Only put only letters in token
- StringPattern pattern = StringPattern.recognize(tokenStrings[i]);
-
- if (pattern.isAllLetter() && tokenStrings[i].length() > 1) {
- nameTokens.add(verifiedName.getType() + "-" + tokenStrings[i]);
+ if (verifiedNames != null) {
+ for (Span verifiedName : verifiedNames) {
+ boolean isStart = true;
+
+ for (int i = 0; i < sentenceTokens.size(); i++) {
+ if (verifiedName.contains(sentenceTokens.get(i))) {
+
+ String outcome;
+
+ // Need better mechanism here, first token in entity should be start!
+ if (isStart) {
+ outcome = NameFinderME.START;
+ isStart = false;
+ }
+ else {
+ outcome = NameFinderME.CONTINUE;
+ }
+
+ // TODO: Overlapping names are dangerous here!
+
+ // TODO: We could use type information here ...
+ // as part of the outcome!
+ verifiedNameTokens.put(i, verifiedName.getType() + "-" + outcome);
+
+ // TODO: Do not put stop word
+ // Only put, if char length is two
+ // Only put only letters in token
+ StringPattern pattern = StringPattern.recognize(tokenStrings[i]);
+
+ if (pattern.isAllLetter() && tokenStrings[i].length() > 1) {
+ nameTokens.add(verifiedName.getType() + "-" + tokenStrings[i]);
+ }
}
}
}
}
-
nameFinder.setRestriction(verifiedNameTokens);
nameFinder.setNameOnlyTokens(nameTokens);
Added: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java?rev=1183249&view=auto
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java (added)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java Fri Oct 14 09:10:25 2011
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.opennlp.caseditor.namefinder;
+
+import org.apache.opennlp.caseditor.OpenNLPPreferenceConstants;
+import org.eclipse.jface.preference.IPreferenceStore;
+
+public class NameFinderPreferenceInitializer {
+
+ static void initializeDefaultPreferences(IPreferenceStore store) {
+ store.setDefault(OpenNLPPreferenceConstants.ENABLE_CONFIRMED_NAME_DETECTION, true);
+ }
+}
Propchange: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferenceInitializer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderPreferencePage.java Fri Oct 14 09:10:25 2011
@@ -17,8 +17,8 @@
package org.apache.opennlp.caseditor.namefinder;
-import org.apache.opennlp.caseditor.OpenNLPPlugin;
import org.apache.opennlp.caseditor.OpenNLPPreferenceConstants;
+import org.eclipse.jface.preference.BooleanFieldEditor;
import org.eclipse.jface.preference.FieldEditorPreferencePage;
import org.eclipse.jface.preference.StringFieldEditor;
import org.eclipse.ui.IWorkbench;
@@ -28,7 +28,6 @@ public class NameFinderPreferencePage ex
implements IWorkbenchPreferencePage {
public NameFinderPreferencePage() {
-// setPreferenceStore();
setDescription("Name Finder Preferences.");
}
@@ -52,5 +51,10 @@ public class NameFinderPreferencePage ex
OpenNLPPreferenceConstants.NAME_TYPE,
"Name Type(s)", getFieldEditorParent());
addField(nameType);
+
+ BooleanFieldEditor enableRecallBoosting = new BooleanFieldEditor(
+ OpenNLPPreferenceConstants.ENABLE_CONFIRMED_NAME_DETECTION,
+ "Enable the detection of confirmed names", getFieldEditorParent());
+ addField(enableRecallBoosting);
}
}
Modified: incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java?rev=1183249&r1=1183248&r2=1183249&view=diff
==============================================================================
--- incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java (original)
+++ incubator/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java Fri Oct 14 09:10:25 2011
@@ -27,6 +27,7 @@ import org.apache.uima.caseditor.editor.
import org.apache.uima.caseditor.editor.util.AnnotationSelection;
import org.eclipse.jface.action.IAction;
import org.eclipse.jface.action.IToolBarManager;
+import org.eclipse.jface.preference.IPreferenceStore;
import org.eclipse.jface.viewers.ISelection;
import org.eclipse.jface.viewers.ISelectionChangedListener;
import org.eclipse.jface.viewers.SelectionChangedEvent;
@@ -65,6 +66,9 @@ class NameFinderViewPage extends Page im
NameFinderViewPage(ICasEditor editor, ICasDocument document) {
this.editor = editor;
+
+ IPreferenceStore store = editor.getCasDocumentProvider().getTypeSystemPreferenceStore(editor.getEditorInput());
+ NameFinderPreferenceInitializer.initializeDefaultPreferences(store);
}
public void createControl(Composite parent) {