You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/12 12:18:00 UTC
svn commit: r1299628 - in
/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor:
./ sentdetect/
Author: joern
Date: Mon Mar 12 11:18:00 2012
New Revision: 1299628
URL: http://svn.apache.org/viewvc?rev=1299628&view=rev
Log:
OPENNLP-462 Added support to exclude annotation types from intersecting with recommended sentences. Existing sentences are now handled via the new exclude logic.
Modified:
opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java
opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java
opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java
opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java
Modified: opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java?rev=1299628&r1=1299627&r2=1299628&view=diff
==============================================================================
--- opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java (original)
+++ opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java Mon Mar 12 11:18:00 2012
@@ -26,6 +26,7 @@ public class OpenNLPPreferenceConstants
// Sentence detector
public static final String PARAGRAPH_TYPE = OpenNLPPlugin.ID + ".PARAGRAPH_TYPE";
public static final String SENTENCE_DETECTOR_MODEL_PATH = OpenNLPPlugin.ID + ".SENTENCE_DETECTOR_MODEL_PATH";
+ public static final String SENT_EXCLUSION_TYPE = OpenNLPPlugin.ID + ".SENT_EXCLUSION_TYPE";
// Name Finder
public static final String ADDITIONAL_SENTENCE_TYPE = OpenNLPPlugin.ID + ".ADDITIONAL_SENTENCE_TYPE";
Modified: opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java?rev=1299628&r1=1299627&r2=1299628&view=diff
==============================================================================
--- opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java (original)
+++ opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java Mon Mar 12 11:18:00 2012
@@ -18,6 +18,7 @@
package org.apache.opennlp.caseditor.sentdetect;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
@@ -116,7 +117,7 @@ public class SentenceContentProvider imp
// Add a new potential sentence
// Only add if it is not a confirmed sentence yet!
- // for each anotation, search confirmed sentence array above ...
+ // for each annotation, search confirmed sentence array above ...
for (PotentialAnnotation sentence : sentences) {
if (EntityContentProvider.searchEntity(confirmedSentences,
sentence.getBeginIndex(), sentence.getEndIndex(),
@@ -125,21 +126,28 @@ public class SentenceContentProvider imp
}
}
+ // TODO: Try to reuse selection computation code
+
// is sentence detector view active ?!
if (SentenceContentProvider.this.sentenceDetectorView.isActive()) {
int newSelectionIndex = -1;
- if (sentenceTable.getSelectionIndex() == -1 && sentenceTable.getItemCount() > 0) {
- newSelectionIndex = 0;
- }
-
- if (selectionIndex < sentenceTable.getItemCount()) {
- newSelectionIndex = selectionIndex;
+ if (sentenceTable.getItemCount() > 0) {
+ if (sentenceTable.getSelectionIndex() == -1) {
+ newSelectionIndex = 0;
+ }
+
+ if (selectionIndex < sentenceTable.getItemCount()) {
+ newSelectionIndex = selectionIndex;
+ }
+ else if (selectionIndex >= sentenceTable.getItemCount()) {
+ newSelectionIndex = sentenceTable.getItemCount() - 1;
+ }
}
if (newSelectionIndex != -1) {
SentenceContentProvider.this.sentenceList.setSelection(
- new StructuredSelection(SentenceContentProvider.this.sentenceList.getElementAt(selectionIndex)));
+ new StructuredSelection(SentenceContentProvider.this.sentenceList.getElementAt(newSelectionIndex)));
}
}
}
@@ -204,11 +212,8 @@ public class SentenceContentProvider imp
}
}
- String modelPath = store.getString(OpenNLPPreferenceConstants.SENTENCE_DETECTOR_MODEL_PATH);
-
- sentenceDetector.setModelPath(modelPath);
sentenceDetector.setParagraphs(paragraphSpans);
- sentenceDetector.setText(editor.getDocument().getCAS().getDocumentText());
+
String sentenceTypeName = store.getString(OpenNLPPreferenceConstants.SENTENCE_TYPE);
@@ -216,8 +221,9 @@ public class SentenceContentProvider imp
sentenceDetectorView.setMessage("Sentence type name is not set!");
return;
}
-
+
Type sentenceType = cas.getTypeSystem().getType(sentenceTypeName);
+ // TODO: Add all existing sentences to the exclusion spans ...
if (sentenceType == null) {
sentenceDetectorView.setMessage("Type system does not contain sentence type!");
@@ -226,6 +232,33 @@ public class SentenceContentProvider imp
sentenceDetector.setSentenceType(sentenceType.getName());
+ String exclusionSpanTypeNames = store.getString(OpenNLPPreferenceConstants.SENT_EXCLUSION_TYPE);
+
+ Type exclusionSpanTypes[] = UIMAUtil.splitTypes(exclusionSpanTypeNames, ',', cas.getTypeSystem());
+
+ if (Arrays.binarySearch(exclusionSpanTypes, sentenceType) == -1) {
+ exclusionSpanTypes = Arrays.copyOf(exclusionSpanTypes, exclusionSpanTypes.length + 1);
+ exclusionSpanTypes[exclusionSpanTypes.length - 1] = sentenceType;
+ }
+
+ List<Span> exclusionSpans = new ArrayList<Span>();
+
+ if (exclusionSpanTypes != null) {
+ for (Iterator<AnnotationFS> exclusionAnnIterator = UIMAUtil.createMultiTypeIterator(cas, exclusionSpanTypes);
+ exclusionAnnIterator.hasNext();) {
+
+ AnnotationFS exclusionAnnotation = exclusionAnnIterator.next();
+ exclusionSpans.add(new Span(exclusionAnnotation.getBegin(), exclusionAnnotation.getEnd()));
+ }
+ }
+
+ sentenceDetector.setExclusionSpans(exclusionSpans);
+
+ String modelPath = store.getString(OpenNLPPreferenceConstants.SENTENCE_DETECTOR_MODEL_PATH);
+ sentenceDetector.setModelPath(modelPath);
+
+ sentenceDetector.setText(editor.getDocument().getCAS().getDocumentText());
+
sentenceDetector.schedule();
}
Modified: opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java?rev=1299628&r1=1299627&r2=1299628&view=diff
==============================================================================
--- opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java (original)
+++ opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java Mon Mar 12 11:18:00 2012
@@ -20,6 +20,7 @@ package org.apache.opennlp.caseditor.sen
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import opennlp.tools.sentdetect.SentenceDetectorME;
@@ -47,6 +48,8 @@ public class SentenceDetectorJob extends
private List<Span> paragraphs;
private List<PotentialAnnotation> detectedSentences;
+
+ private List<Span> exclusionSpans;
public SentenceDetectorJob() {
super("Sentence Detector Job");
@@ -68,6 +71,13 @@ public class SentenceDetectorJob extends
this.paragraphs = paragraphs;
}
+ public void setExclusionSpans(List<Span> exclusionSpans) {
+
+ this.exclusionSpans = new ArrayList<Span>();
+ this.exclusionSpans.addAll(exclusionSpans);
+ Collections.sort(this.exclusionSpans);
+ }
+
@Override
protected IStatus run(IProgressMonitor monitor) {
@@ -93,17 +103,42 @@ public class SentenceDetectorJob extends
detectedSentences = new ArrayList<PotentialAnnotation>();
for (Span para : paragraphs) {
-
- Span sentenceSpans[] = sentenceDetector.sentPosDetect(para.getCoveredText(text).toString());
+
+ List<Span> textBlocks = new ArrayList<Span>();
+
+ int textBlockBeginIndex = 0;
+
+ for (Span exclusionSpan : exclusionSpans) {
+
+ Span textBlockSpan = new Span(textBlockBeginIndex, exclusionSpan.getStart());
+
+ // TODO: Filter out whitespace sentences ...
+
+ if (textBlockSpan.length() > 0) {
+ textBlocks.add(textBlockSpan);
+ }
+
+ textBlockBeginIndex = exclusionSpan.getEnd();
+ }
- double confidence[] = sentenceDetector.getSentenceProbabilities();
+ if (textBlockBeginIndex < para.getEnd() - para.getStart()) {
+ textBlocks.add(new Span(textBlockBeginIndex, para.getEnd()));
+ }
- for (int i = 0; i < sentenceSpans.length; i++) {
- Span sentenceSpan = sentenceSpans[i];
- String sentenceText = text.substring(para.getStart() + sentenceSpan.getStart(), para.getStart() + sentenceSpan.getEnd());
- detectedSentences.add(new PotentialAnnotation(para.getStart() + sentenceSpan.getStart(),
- para.getStart() + sentenceSpan.getEnd(), sentenceText,
- confidence[i], sentenceType));
+ for (Span textBlock : textBlocks) {
+ Span sentenceSpans[] = sentenceDetector.sentPosDetect(
+ textBlock.getCoveredText(text).toString());
+
+ double confidence[] = sentenceDetector.getSentenceProbabilities();
+
+ for (int i = 0; i < sentenceSpans.length; i++) {
+ Span sentenceSpan = sentenceSpans[i];
+ String sentenceText = text.substring(textBlock.getStart() + sentenceSpan.getStart(),
+ textBlock.getStart() + sentenceSpan.getEnd());
+ detectedSentences.add(new PotentialAnnotation(textBlock.getStart() + sentenceSpan.getStart(),
+ textBlock.getStart() + sentenceSpan.getEnd(), sentenceText,
+ confidence[i], sentenceType));
+ }
}
}
Modified: opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java?rev=1299628&r1=1299627&r2=1299628&view=diff
==============================================================================
--- opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java (original)
+++ opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java Mon Mar 12 11:18:00 2012
@@ -48,7 +48,12 @@ public class SentenceDetectorPreferenceP
TypeListFieldEditor paragraphType = new TypeListFieldEditor(
OpenNLPPreferenceConstants.PARAGRAPH_TYPE,
- "Paragraph Type", ts, getFieldEditorParent());
+ "Paragraph Types", ts, getFieldEditorParent());
addField(paragraphType);
+
+ TypeListFieldEditor exclusionType = new TypeListFieldEditor(
+ OpenNLPPreferenceConstants.SENT_EXCLUSION_TYPE,
+ "Exclusion Types", ts, getFieldEditorParent());
+ addField(exclusionType);
}
}