You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/23 18:21:57 UTC

svn commit: r1589451 - /ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java

Author: tmill
Date: Wed Apr 23 16:21:57 2014
New Revision: 1589451

URL: http://svn.apache.org/r1589451
Log:
CTAKES-295: Updated to use uima-fit configuration parameters.

Modified:
    ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java

Modified: ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java?rev=1589451&r1=1589450&r2=1589451&view=diff
==============================================================================
--- ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java (original)
+++ ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java Wed Apr 23 16:21:57 2014
@@ -18,27 +18,25 @@
  */
 package org.apache.ctakes.chunker.ae;
 
-import java.io.File;
-import java.io.FileInputStream;
+import java.io.IOException;
 import java.io.InputStream;
-import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import opennlp.tools.chunker.ChunkerModel;
 // import opennlp.tools.lang.english.TreebankChunker; // no longer part of OpenNLP as of 1.5
 
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
-
-import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.util.JCasUtil;
 
 /**
  * This class provides a UIMA wrapper for the OpenNLP
@@ -58,7 +56,15 @@ public class Chunker extends JCasAnnotat
 	 * end with ".bin.gz" or ".txt". If this is not the case, then please see
 	 * resources/models/README.
 	 */
-	public static final String CHUNKER_MODEL_FILE_PARAM = "ChunkerModelFile";
+	public static final String CHUNKER_MODEL_FILE_PARAM = "ChunkerModelFile"; // keep for backwards compatibility
+	public static final String PARAM_CHUNKER_MODEL_FILE = CHUNKER_MODEL_FILE_PARAM;
+	@ConfigurationParameter(
+	    name = PARAM_CHUNKER_MODEL_FILE,
+	    mandatory = false,
+	    defaultValue = "org/apache/ctakes/chunker/models/chunker-model.zip",
+	    description = "Model file for OpenNLP chunker"
+	    )
+  private String chunkerModelPath;
 
 	/**
 	 * "ChunkCreatorClass" is a required, single, string parameter that
@@ -69,64 +75,59 @@ public class Chunker extends JCasAnnotat
 	 * @see DefaultChunkCreator
 	 * @see PhraseTypeChunkCreator
 	 */
-	public static final String CHUNKER_CREATOR_CLASS_PARAM = "ChunkCreatorClass";
+	public static final String CHUNKER_CREATOR_CLASS_PARAM = "ChunkCreatorClass"; // kept for backwards compatibility
+	public static final String PARAM_CHUNKER_CREATOR_CLASS = CHUNKER_CREATOR_CLASS_PARAM;
+	@ConfigurationParameter(
+	    name = PARAM_CHUNKER_CREATOR_CLASS,
+	    mandatory = false,
+	    defaultValue = "org.apache.ctakes.chunker.ae.DefaultChunkCreator",
+	    description = "The class that will create the chunks"
+	    )
+  String chunkerCreatorClassName;
 
 	private opennlp.tools.chunker.Chunker chunker;
 
 	ChunkCreator chunkerCreator;
 
-
-	public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
+	@Override
+  public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
 		super.initialize(uimaContext);
 
-		String chunkerModelPath = null;
-		try {
-			chunkerModelPath = (String) uimaContext.getConfigParameterValue(CHUNKER_MODEL_FILE_PARAM);
-			File chunkerModelFile = FileLocator.locateFile(chunkerModelPath);
-			InputStream fis = new FileInputStream(chunkerModelFile);
+    logger.info("Chunker model file: " + chunkerModelPath); 
+		try (InputStream fis = FileLocator.getAsStream(chunkerModelPath)) {
 			ChunkerModel model = new ChunkerModel(fis);
-			String chunkerModelAbsPath = chunkerModelFile.getAbsolutePath();
-			logger.info("Chunker model file: " + chunkerModelAbsPath); 
-									
 			chunker = new opennlp.tools.chunker.ChunkerME(model);
 
-			String chunkerCreatorClassName = (String) uimaContext.getConfigParameterValue(CHUNKER_CREATOR_CLASS_PARAM);
-
-			chunkerCreator = (ChunkCreator) Class.forName(chunkerCreatorClassName).newInstance();
-			chunkerCreator.initialize(uimaContext);
-
-		} catch (Exception e) {
+		} catch (IOException e) {
 			logger.info("Chunker model: " + chunkerModelPath); 
 			throw new ResourceInitializationException(e);
 		}
+		
+    try {
+      chunkerCreator = (ChunkCreator) Class.forName(chunkerCreatorClassName).newInstance();
+    } catch (InstantiationException | IllegalAccessException
+        | ClassNotFoundException e) {
+      logger.error("Error creating chunkerCreator from classname: " + chunkerCreatorClassName);
+      throw new ResourceInitializationException(e);
+    }
+    chunkerCreator.initialize(uimaContext);
 	}
 
-	public void process(JCas jCas) throws AnalysisEngineProcessException {
+	@Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
 
 		logger.info(" process(JCas)");
 
-		List<BaseToken> tokens = new ArrayList<BaseToken>();
-
-		AnnotationIndex baseTokenIndex = jCas.getAnnotationIndex(BaseToken.type);
-		FSIterator sentences = jCas.getAnnotationIndex(Sentence.type).iterator();
-
-		while (sentences.hasNext()) {
-			Sentence sentence = (Sentence) sentences.next();
-
-			tokens.clear();
-
-			FSIterator tokenIterator = baseTokenIndex.subiterator(sentence);
-			while (tokenIterator.hasNext()) {
-				BaseToken token = (BaseToken) tokenIterator.next();
-				tokens.add(token);
-			}
-
-			String[] words = new String[tokens.size()];
-			String[] tags = new String[tokens.size()];
-			for (int i = 0; i < tokens.size(); i++) {
-				words[i] = tokens.get(i).getCoveredText();
-				tags[i] = tokens.get(i).getPartOfSpeech();
-			}
+		Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+		
+		for(Sentence sentence : sentences){
+	    List<BaseToken> tokens = JCasUtil.selectCovered(BaseToken.class, sentence);
+      String[] words = new String[tokens.size()];
+      String[] tags = new String[tokens.size()];
+      for(int i = 0; i < tokens.size(); i++){
+        words[i] = tokens.get(i).getCoveredText();
+        tags[i] = tokens.get(i).getPartOfSpeech();
+      }
 
 			String[] chunks = chunker.chunk(words, tags);
 
@@ -142,24 +143,24 @@ public class Chunker extends JCasAnnotat
 			// TreebankChunker.validOutcome()
 			// This code was directly modified from TreebankChunker.main()
 			for (int i = 0; i < chunks.length; i++) {
-			    
-			    if (i > 0 && !chunks[i].startsWith("I-")) { // && !chunks[i - 1].equals("O")) {
-				chunkEnd = tokens.get(i - 1).getEnd();
-				chunkerCreator.createChunk(jCas, chunkBegin, chunkEnd, chunkType);
-			    }
-			    
-			    if (chunks[i].startsWith("B-")) {
-				chunkBegin = tokens.get(i).getBegin();
-				chunkType = chunks[i].substring(2);
-			    } else if (chunks[i].equals("O")) { // O found  (her_PRP$ ear_O)
-				chunkBegin = tokens.get(i).getBegin();
-				chunkType = chunks[i];
-			    
-			    }
+
+			  if (i > 0 && !chunks[i].startsWith("I-")) { // && !chunks[i - 1].equals("O")) {
+			    chunkEnd = tokens.get(i - 1).getEnd();
+			    chunkerCreator.createChunk(jCas, chunkBegin, chunkEnd, chunkType);
+			  }
+
+			  if (chunks[i].startsWith("B-")) {
+			    chunkBegin = tokens.get(i).getBegin();
+			    chunkType = chunks[i].substring(2);
+			  } else if (chunks[i].equals("O")) { // O found  (her_PRP$ ear_O)
+			    chunkBegin = tokens.get(i).getBegin();
+			    chunkType = chunks[i];
+
+			  }
 			}
 			if (chunks.length > 0 && !chunks[chunks.length - 1].equals("O")) {
-				chunkEnd = tokens.get(chunks.length - 1).getEnd();
-				chunkerCreator.createChunk(jCas, chunkBegin, chunkEnd, chunkType);
+			  chunkEnd = tokens.get(chunks.length - 1).getEnd();
+			  chunkerCreator.createChunk(jCas, chunkBegin, chunkEnd, chunkType);
 			}
 		}
 	}