You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2017/10/02 20:48:44 UTC

svn commit: r1810599 - in /ctakes/trunk: ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ ctakes-constituency-parser/src/main/java/org/apa...

Author: seanfinan
Date: Mon Oct  2 20:48:43 2017
New Revision: 1810599

URL: http://svn.apache.org/viewvc?rev=1810599&view=rev
Log:
ConstituencyParser add dot logging since initialization can take several seconds
TreeUtils added getTerminals(..) that accepts BaseTokens
MaxEntParserWrapper use uimafit index map instead of double select for speed
MarkableSalienceAnnotator added start, finish logging
MentionCluster.. added start, finish logging; still waiting on notYetProcess(jCas)

Modified:
    ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java
    ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
    ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java

Modified: ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java?rev=1810599&r1=1810598&r2=1810599&view=diff
==============================================================================
--- ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java (original)
+++ ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java Mon Oct  2 20:48:43 2017
@@ -24,17 +24,21 @@ import opennlp.tools.parser.ParserModel;
 import opennlp.tools.parser.chunking.Parser;
 import org.apache.ctakes.constituency.parser.util.TreeUtils;
 import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.log4j.Logger;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Map;
 
 public class MaxentParserWrapper implements ParserWrapper {
 
@@ -72,13 +76,17 @@ public class MaxentParserWrapper impleme
       logger.info( "Started processing: " + docId );
       // iterate over sentences
 		Parse parse = null;
-      final Collection<Sentence> allSentences = org.apache.uima.fit.util.JCasUtil.select( jcas, Sentence.class );
-      for ( Sentence sentence : allSentences ) {
+//      final Collection<Sentence> allSentences = org.apache.uima.fit.util.JCasUtil.select( jcas, Sentence.class );
+//      for ( Sentence sentence : allSentences ) {
+      final Map<Sentence, Collection<BaseToken>> sentenceTokenMap = JCasUtil.indexCovered( jcas, Sentence.class, BaseToken.class );
+      for ( Map.Entry<Sentence, Collection<BaseToken>> sentenceTokens : sentenceTokenMap.entrySet() ) {
+         final Sentence sentence = sentenceTokens.getKey();
          final String text = sentence.getCoveredText();
          if ( text.isEmpty() || isBorderOnly( text ) ) {
             continue;
          }
-         final FSArray terminalArray = TreeUtils.getTerminals( jcas, sentence );
+//         final FSArray terminalArray = TreeUtils.getTerminals( jcas, sentence );
+         final FSArray terminalArray = TreeUtils.getTerminals( jcas, new ArrayList<>( sentenceTokens.getValue() ) );
          final String tokenString = TreeUtils.getSplitSentence( terminalArray );
          if ( tokenString.isEmpty() ) {
             parse = null;

Modified: ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java?rev=1810599&r1=1810598&r2=1810599&view=diff
==============================================================================
--- ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java (original)
+++ ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java Mon Oct  2 20:48:43 2017
@@ -22,6 +22,7 @@ import org.apache.ctakes.constituency.pa
 import org.apache.ctakes.constituency.parser.ParserWrapper;
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
 import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.util.DotLogger;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -32,13 +33,13 @@ import org.apache.uima.fit.factory.Analy
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
 
-import java.io.FileNotFoundException;
+import java.io.IOException;
 
 
 @PipeBitInfo(
       name = "Constituency Parser",
-      description = ".",
-      dependencies = { PipeBitInfo.TypeProduct.DOCUMENT_ID, PipeBitInfo.TypeProduct.SENTENCE },
+		description = "Adds Terminal Treebank Nodes, necessary for Coreference Markables.",
+		dependencies = { PipeBitInfo.TypeProduct.DOCUMENT_ID, PipeBitInfo.TypeProduct.SENTENCE },
       products = { PipeBitInfo.TypeProduct.TREE_NODE }
 )
 public class ConstituencyParser extends JCasAnnotator_ImplBase {
@@ -49,24 +50,24 @@ public class ConstituencyParser extends
 			description = "File containing the opennlp-trained parser model",
 			mandatory = false,
 			defaultValue = "org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin"
-	) private String modelFilename;
+	)
+	private String modelFilename;
 	
 	
 	private ParserWrapper parser = null;
 	private Logger logger = Logger.getLogger(this.getClass());
 
 	@Override
-	public void initialize(UimaContext aContext)
-			throws ResourceInitializationException {
-		super.initialize(aContext);
-		try {
-			logger.info("Initializing parser...");		
-			parser = new MaxentParserWrapper(FileLocator.getAsStream(modelFilename));
-		} catch (FileNotFoundException e) {
-			e.printStackTrace();
-			logger.error("Error reading parser model file/directory: " + e.getMessage());
-			throw new ResourceInitializationException(e);
+	public void initialize( final UimaContext aContext ) throws ResourceInitializationException {
+		super.initialize( aContext );
+		logger.info( "Initializing ..." );
+		try ( DotLogger dotter = new DotLogger() ) {
+			parser = new MaxentParserWrapper( FileLocator.getAsStream( modelFilename ) );
+		} catch ( IOException ioE ) {
+			logger.error( "Error reading parser model file/directory: " + ioE.getMessage() );
+			throw new ResourceInitializationException( ioE );
 		}
+		logger.info( "Finished." );
 	}
 
 

Modified: ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java?rev=1810599&r1=1810598&r2=1810599&view=diff
==============================================================================
--- ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java (original)
+++ ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java Mon Oct  2 20:48:43 2017
@@ -287,36 +287,45 @@ final public class TreeUtils {
    public static FSArray getTerminals( final JCas jcas, final Sentence sentence ) {
       final List<BaseToken> baseTokens = org.apache.uima.fit.util.JCasUtil
             .selectCovered( jcas, BaseToken.class, sentence );
-      final List<BaseToken> wordList = new ArrayList<>();
-      for ( BaseToken baseToken : baseTokens ) {
-         if ( !(baseToken instanceof NewlineToken) ) {
-            wordList.add( baseToken );
-         }
-      }
-      final FSArray terminals = new FSArray( jcas, wordList.size() );
-      int termIndex = 0;
-      for ( BaseToken word : wordList ) {
-         final TerminalTreebankNode ttn = new TerminalTreebankNode( jcas, word.getBegin(), word.getEnd() );
-         ttn.setChildren( null );
-         ttn.setIndex( termIndex );
-         ttn.setTokenIndex( termIndex );
-         ttn.setLeaf( true );
-         ttn.setNodeTags( null );
-         final String wordText = word.getCoveredText();
-         if ( word instanceof PunctuationToken && BRACKET_MAP.containsKey( wordText ) ) {
-            ttn.setNodeValue( BRACKET_MAP.get( wordText ) );
-         } else {
-            ttn.setNodeValue( wordText );
-         }
+		return getTerminals( jcas, baseTokens );
+	}
+
+	/**
+	 * @param jcas       ye olde ...
+	 * @param baseTokens base tokens in a window (usually sentence)
+	 * @return terminals for the sentence
+	 */
+	public static FSArray getTerminals( final JCas jcas, final List<BaseToken> baseTokens ) {
+		final List<BaseToken> wordList = new ArrayList<>();
+		for ( BaseToken baseToken : baseTokens ) {
+			if ( !(baseToken instanceof NewlineToken) ) {
+				wordList.add( baseToken );
+			}
+		}
+		final FSArray terminals = new FSArray( jcas, wordList.size() );
+		int termIndex = 0;
+		for ( BaseToken word : wordList ) {
+			final TerminalTreebankNode ttn = new TerminalTreebankNode( jcas, word.getBegin(), word.getEnd() );
+			ttn.setChildren( null );
+			ttn.setIndex( termIndex );
+			ttn.setTokenIndex( termIndex );
+			ttn.setLeaf( true );
+			ttn.setNodeTags( null );
+			final String wordText = word.getCoveredText();
+			if ( word instanceof PunctuationToken && BRACKET_MAP.containsKey( wordText ) ) {
+				ttn.setNodeValue( BRACKET_MAP.get( wordText ) );
+			} else {
+				ttn.setNodeValue( wordText );
+			}
 //			ttn.addToIndexes();
-         terminals.set( termIndex, ttn );
-         termIndex++;
-      }
-      return terminals;
-   }
+			terminals.set( termIndex, ttn );
+			termIndex++;
+		}
+		return terminals;
+	}
 
 
-   public static String getSplitSentence( final FSArray terminalArray ) {
+	public static String getSplitSentence( final FSArray terminalArray ) {
 //		int offset = 0;  // what was this for?
       final StringBuilder sb = new StringBuilder();
       for ( int i = 0; i < terminalArray.size(); i++ ) {

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java?rev=1810599&r1=1810598&r2=1810599&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java Mon Oct  2 20:48:43 2017
@@ -6,6 +6,7 @@ import org.apache.ctakes.coreference.ae.
 import org.apache.ctakes.coreference.ae.features.salience.MorphosyntacticFeatureExtractor;
 import org.apache.ctakes.coreference.ae.features.salience.SemanticEnvironmentFeatureExtractor;
 import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -37,6 +38,8 @@ import java.util.Map;
 )
 public class MarkableSalienceAnnotator extends CleartkAnnotator<Boolean> {
 
+  static private final Logger LOGGER = Logger.getLogger( "MarkableSalienceAnnotator" );
+
   List<FeatureExtractor1<Markable>> extractors = new ArrayList<>();
   
   public static AnalysisEngineDescription createDataWriterDescription(
@@ -62,19 +65,20 @@ public class MarkableSalienceAnnotator e
   }
   
   @Override
-  public void initialize(UimaContext context)
-      throws ResourceInitializationException {
-    super.initialize(context);
-    
-    extractors.add(new MorphosyntacticFeatureExtractor());
-    extractors.add(new GrammaticalRoleFeatureExtractor());
-    extractors.add(new SemanticEnvironmentFeatureExtractor());
-    extractors.add(new ClinicalFeatureExtractor());
+  public void initialize( final UimaContext context ) throws ResourceInitializationException {
+    LOGGER.info( "Initializing ..." );
+    super.initialize( context );
+
+    extractors.add( new MorphosyntacticFeatureExtractor() );
+    extractors.add( new GrammaticalRoleFeatureExtractor() );
+    extractors.add( new SemanticEnvironmentFeatureExtractor() );
+    extractors.add( new ClinicalFeatureExtractor() );
+    LOGGER.info( "Finished." );
   }
   
   @Override
   public void process(JCas jcas) throws AnalysisEngineProcessException {
-    
+    LOGGER.info( "Processing ..." );
     for(Markable markable : JCasUtil.select(jcas, Markable.class)){
       boolean outcome;
       List<Feature> features = new ArrayList<>();
@@ -92,5 +96,6 @@ public class MarkableSalienceAnnotator e
         markable.setConfidence(outcomes.get(true).floatValue());
       }
     }
+    LOGGER.info( "Finished." );
   }
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1810599&r1=1810598&r2=1810599&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java Mon Oct  2 20:48:43 2017
@@ -205,14 +205,16 @@ public class MentionClusterCoreferenceAn
   }
    
   @Override
-  public void initialize(UimaContext context) throws ResourceInitializationException {
-    super.initialize(context);
-    
-    if(this.useExistingEncoders && classDataWriter != null){
+  public void initialize( final UimaContext context ) throws ResourceInitializationException {
+    LOGGER.info( "Initializing ..." );
+    super.initialize( context );
+
+    if ( this.useExistingEncoders && classDataWriter != null ) {
       this.dataWriter = classDataWriter;
-    }else if(this.isTraining()){
+    } else if ( this.isTraining() ) {
       classDataWriter = this.dataWriter;
     }
+    LOGGER.info( "Finished." );
   }
 
   public void notYetProcess( final JCas jCas ) throws AnalysisEngineProcessException {