You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/12 12:18:00 UTC

svn commit: r1299628 - in /opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor: ./ sentdetect/

Author: joern
Date: Mon Mar 12 11:18:00 2012
New Revision: 1299628

URL: http://svn.apache.org/viewvc?rev=1299628&view=rev
Log:
OPENNLP-462 Added support to exclude annotation types from intersecting with recommended sentences. Existing sentences are now handled via the new exclude logic.

Modified:
    opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java
    opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java
    opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java
    opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java

Modified: opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java?rev=1299628&r1=1299627&r2=1299628&view=diff
==============================================================================
--- opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java (original)
+++ opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/OpenNLPPreferenceConstants.java Mon Mar 12 11:18:00 2012
@@ -26,6 +26,7 @@ public class OpenNLPPreferenceConstants 
   // Sentence detector
   public static final String PARAGRAPH_TYPE = OpenNLPPlugin.ID + ".PARAGRAPH_TYPE";
   public static final String SENTENCE_DETECTOR_MODEL_PATH = OpenNLPPlugin.ID + ".SENTENCE_DETECTOR_MODEL_PATH";
+  public static final String SENT_EXCLUSION_TYPE = OpenNLPPlugin.ID + ".SENT_EXCLUSION_TYPE";
 
   // Name Finder  
   public static final String ADDITIONAL_SENTENCE_TYPE = OpenNLPPlugin.ID + ".ADDITIONAL_SENTENCE_TYPE";

Modified: opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java?rev=1299628&r1=1299627&r2=1299628&view=diff
==============================================================================
--- opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java (original)
+++ opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceContentProvider.java Mon Mar 12 11:18:00 2012
@@ -18,6 +18,7 @@
 package org.apache.opennlp.caseditor.sentdetect;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 
@@ -116,7 +117,7 @@ public class SentenceContentProvider imp
               
               // Add a new potential sentence
               // Only add if it is not a confirmed sentence yet!
-              // for each anotation, search confirmed sentence array above ...
+              // for each annotation, search confirmed sentence array above ...
               for (PotentialAnnotation sentence : sentences) {
                 if (EntityContentProvider.searchEntity(confirmedSentences,
                     sentence.getBeginIndex(), sentence.getEndIndex(),
@@ -125,21 +126,28 @@ public class SentenceContentProvider imp
                 }
               }
               
+              // TODO: Try to reuse selection computation code
+              
               // is sentence detector view active ?!
               if (SentenceContentProvider.this.sentenceDetectorView.isActive()) {
                 int newSelectionIndex = -1;
                 
-                if (sentenceTable.getSelectionIndex() == -1 && sentenceTable.getItemCount() > 0) {
-                  newSelectionIndex = 0;
-                }
-                
-                if (selectionIndex < sentenceTable.getItemCount()) {
-                  newSelectionIndex = selectionIndex;
+                if (sentenceTable.getItemCount() > 0) {
+                  if (sentenceTable.getSelectionIndex() == -1) {
+                    newSelectionIndex = 0;
+                  }
+                  
+                  if (selectionIndex < sentenceTable.getItemCount()) {
+                    newSelectionIndex = selectionIndex;
+                  }
+                  else if (selectionIndex >= sentenceTable.getItemCount()) {
+                    newSelectionIndex = sentenceTable.getItemCount() - 1;
+                  }
                 }
                 
                 if (newSelectionIndex != -1) {
                   SentenceContentProvider.this.sentenceList.setSelection(
-                      new StructuredSelection(SentenceContentProvider.this.sentenceList.getElementAt(selectionIndex)));
+                      new StructuredSelection(SentenceContentProvider.this.sentenceList.getElementAt(newSelectionIndex)));
                 }
               }
             }
@@ -204,11 +212,8 @@ public class SentenceContentProvider imp
       }
     }
     
-    String modelPath = store.getString(OpenNLPPreferenceConstants.SENTENCE_DETECTOR_MODEL_PATH);
-    
-    sentenceDetector.setModelPath(modelPath);
     sentenceDetector.setParagraphs(paragraphSpans);
-    sentenceDetector.setText(editor.getDocument().getCAS().getDocumentText());
+
     
     String sentenceTypeName = store.getString(OpenNLPPreferenceConstants.SENTENCE_TYPE);
     
@@ -216,8 +221,9 @@ public class SentenceContentProvider imp
       sentenceDetectorView.setMessage("Sentence type name is not set!");
       return;
     }
-      
+    
     Type sentenceType = cas.getTypeSystem().getType(sentenceTypeName);
+    // TODO: Add all existing sentences to the exclusion spans ...
     
     if (sentenceType == null) {
       sentenceDetectorView.setMessage("Type system does not contain sentence type!");
@@ -226,6 +232,33 @@ public class SentenceContentProvider imp
     
     sentenceDetector.setSentenceType(sentenceType.getName());
     
+    String exclusionSpanTypeNames = store.getString(OpenNLPPreferenceConstants.SENT_EXCLUSION_TYPE);
+    
+    Type exclusionSpanTypes[] = UIMAUtil.splitTypes(exclusionSpanTypeNames, ',', cas.getTypeSystem());
+    
+    if (Arrays.binarySearch(exclusionSpanTypes, sentenceType) == -1) {
+      exclusionSpanTypes = Arrays.copyOf(exclusionSpanTypes, exclusionSpanTypes.length + 1);
+      exclusionSpanTypes[exclusionSpanTypes.length - 1] = sentenceType;
+    }
+    
+    List<Span> exclusionSpans = new ArrayList<Span>();
+    
+    if (exclusionSpanTypes != null) {
+      for (Iterator<AnnotationFS> exclusionAnnIterator = UIMAUtil.createMultiTypeIterator(cas, exclusionSpanTypes);
+          exclusionAnnIterator.hasNext();) {
+        
+        AnnotationFS exclusionAnnotation = exclusionAnnIterator.next();
+        exclusionSpans.add(new Span(exclusionAnnotation.getBegin(), exclusionAnnotation.getEnd()));
+      }
+    }
+    
+    sentenceDetector.setExclusionSpans(exclusionSpans);
+    
+    String modelPath = store.getString(OpenNLPPreferenceConstants.SENTENCE_DETECTOR_MODEL_PATH);
+    sentenceDetector.setModelPath(modelPath);
+    
+    sentenceDetector.setText(editor.getDocument().getCAS().getDocumentText());
+    
     sentenceDetector.schedule();
   }
   

Modified: opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java?rev=1299628&r1=1299627&r2=1299628&view=diff
==============================================================================
--- opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java (original)
+++ opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorJob.java Mon Mar 12 11:18:00 2012
@@ -20,6 +20,7 @@ package org.apache.opennlp.caseditor.sen
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import opennlp.tools.sentdetect.SentenceDetectorME;
@@ -47,6 +48,8 @@ public class SentenceDetectorJob extends
   private List<Span> paragraphs;
   
   private List<PotentialAnnotation> detectedSentences;
+
+  private List<Span> exclusionSpans;
   
   public SentenceDetectorJob() {
     super("Sentence Detector Job");
@@ -68,6 +71,13 @@ public class SentenceDetectorJob extends
     this.paragraphs =  paragraphs;
   }
   
+  public void setExclusionSpans(List<Span> exclusionSpans) {
+    
+    this.exclusionSpans = new ArrayList<Span>();
+    this.exclusionSpans.addAll(exclusionSpans);
+    Collections.sort(this.exclusionSpans);
+  }
+  
   @Override
   protected IStatus run(IProgressMonitor monitor) {
     
@@ -93,17 +103,42 @@ public class SentenceDetectorJob extends
     
     detectedSentences = new ArrayList<PotentialAnnotation>();
     for (Span para : paragraphs) {
-    
-      Span sentenceSpans[] = sentenceDetector.sentPosDetect(para.getCoveredText(text).toString());
+
+      List<Span> textBlocks = new ArrayList<Span>();
+
+      int textBlockBeginIndex = 0;
+      
+      for (Span exclusionSpan : exclusionSpans) {
+        
+        Span textBlockSpan = new Span(textBlockBeginIndex, exclusionSpan.getStart());
+        
+        // TODO: Filter out whitespace sentences ...
+        
+        if (textBlockSpan.length() > 0) {
+          textBlocks.add(textBlockSpan);
+        }
+        
+        textBlockBeginIndex = exclusionSpan.getEnd();
+      }
       
-      double confidence[] = sentenceDetector.getSentenceProbabilities();
+      if (textBlockBeginIndex < para.getEnd() - para.getStart()) {
+        textBlocks.add(new Span(textBlockBeginIndex, para.getEnd()));
+      }
       
-      for (int i = 0; i < sentenceSpans.length; i++) {
-        Span sentenceSpan = sentenceSpans[i];
-        String sentenceText = text.substring(para.getStart() + sentenceSpan.getStart(), para.getStart() + sentenceSpan.getEnd());
-        detectedSentences.add(new PotentialAnnotation(para.getStart() + sentenceSpan.getStart(), 
-            para.getStart() + sentenceSpan.getEnd(), sentenceText,
-            confidence[i], sentenceType));
+      for (Span textBlock : textBlocks) {
+        Span sentenceSpans[] = sentenceDetector.sentPosDetect(
+            textBlock.getCoveredText(text).toString());
+        
+        double confidence[] = sentenceDetector.getSentenceProbabilities();
+        
+        for (int i = 0; i < sentenceSpans.length; i++) {
+          Span sentenceSpan = sentenceSpans[i];
+          String sentenceText = text.substring(textBlock.getStart() + sentenceSpan.getStart(),
+              textBlock.getStart() + sentenceSpan.getEnd());
+          detectedSentences.add(new PotentialAnnotation(textBlock.getStart() + sentenceSpan.getStart(), 
+              textBlock.getStart() + sentenceSpan.getEnd(), sentenceText,
+              confidence[i], sentenceType));
+        }
       }
     }
     

Modified: opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java?rev=1299628&r1=1299627&r2=1299628&view=diff
==============================================================================
--- opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java (original)
+++ opennlp/sandbox/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/sentdetect/SentenceDetectorPreferencePage.java Mon Mar 12 11:18:00 2012
@@ -48,7 +48,12 @@ public class SentenceDetectorPreferenceP
     
     TypeListFieldEditor paragraphType = new TypeListFieldEditor(
         OpenNLPPreferenceConstants.PARAGRAPH_TYPE,
-        "Paragraph Type", ts, getFieldEditorParent());
+        "Paragraph Types", ts, getFieldEditorParent());
     addField(paragraphType);
+    
+    TypeListFieldEditor exclusionType = new TypeListFieldEditor(
+        OpenNLPPreferenceConstants.SENT_EXCLUSION_TYPE,
+        "Exclusion Types", ts, getFieldEditorParent());
+    addField(exclusionType);
   }
 }