You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/23 18:21:57 UTC
svn commit: r1589451 -
/ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java
Author: tmill
Date: Wed Apr 23 16:21:57 2014
New Revision: 1589451
URL: http://svn.apache.org/r1589451
Log:
CTAKES-295: Updated to use uima-fit configuration parameters.
Modified:
ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java
Modified: ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java?rev=1589451&r1=1589450&r2=1589451&view=diff
==============================================================================
--- ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java (original)
+++ ctakes/trunk/ctakes-chunker/src/main/java/org/apache/ctakes/chunker/ae/Chunker.java Wed Apr 23 16:21:57 2014
@@ -18,27 +18,25 @@
*/
package org.apache.ctakes.chunker.ae;
-import java.io.File;
-import java.io.FileInputStream;
+import java.io.IOException;
import java.io.InputStream;
-import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import opennlp.tools.chunker.ChunkerModel;
// import opennlp.tools.lang.english.TreebankChunker; // no longer part of OpenNLP as of 1.5
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
-
-import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.util.JCasUtil;
/**
* This class provides a UIMA wrapper for the OpenNLP
@@ -58,7 +56,15 @@ public class Chunker extends JCasAnnotat
* end with ".bin.gz" or ".txt". If this is not the case, then please see
* resources/models/README.
*/
- public static final String CHUNKER_MODEL_FILE_PARAM = "ChunkerModelFile";
+ public static final String CHUNKER_MODEL_FILE_PARAM = "ChunkerModelFile"; // keep for backwards compatibility
+ public static final String PARAM_CHUNKER_MODEL_FILE = CHUNKER_MODEL_FILE_PARAM;
+ @ConfigurationParameter(
+ name = PARAM_CHUNKER_MODEL_FILE,
+ mandatory = false,
+ defaultValue = "org/apache/ctakes/chunker/models/chunker-model.zip",
+ description = "Model file for OpenNLP chunker"
+ )
+ private String chunkerModelPath;
/**
* "ChunkCreatorClass" is a required, single, string parameter that
@@ -69,64 +75,59 @@ public class Chunker extends JCasAnnotat
* @see DefaultChunkCreator
* @see PhraseTypeChunkCreator
*/
- public static final String CHUNKER_CREATOR_CLASS_PARAM = "ChunkCreatorClass";
+ public static final String CHUNKER_CREATOR_CLASS_PARAM = "ChunkCreatorClass"; // kept for backwards compatibility
+ public static final String PARAM_CHUNKER_CREATOR_CLASS = CHUNKER_CREATOR_CLASS_PARAM;
+ @ConfigurationParameter(
+ name = PARAM_CHUNKER_CREATOR_CLASS,
+ mandatory = false,
+ defaultValue = "org.apache.ctakes.chunker.ae.DefaultChunkCreator",
+ description = "The class that will create the chunks"
+ )
+ String chunkerCreatorClassName;
private opennlp.tools.chunker.Chunker chunker;
ChunkCreator chunkerCreator;
-
- public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
+ @Override
+ public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
super.initialize(uimaContext);
- String chunkerModelPath = null;
- try {
- chunkerModelPath = (String) uimaContext.getConfigParameterValue(CHUNKER_MODEL_FILE_PARAM);
- File chunkerModelFile = FileLocator.locateFile(chunkerModelPath);
- InputStream fis = new FileInputStream(chunkerModelFile);
+ logger.info("Chunker model file: " + chunkerModelPath);
+ try (InputStream fis = FileLocator.getAsStream(chunkerModelPath)) {
ChunkerModel model = new ChunkerModel(fis);
- String chunkerModelAbsPath = chunkerModelFile.getAbsolutePath();
- logger.info("Chunker model file: " + chunkerModelAbsPath);
-
chunker = new opennlp.tools.chunker.ChunkerME(model);
- String chunkerCreatorClassName = (String) uimaContext.getConfigParameterValue(CHUNKER_CREATOR_CLASS_PARAM);
-
- chunkerCreator = (ChunkCreator) Class.forName(chunkerCreatorClassName).newInstance();
- chunkerCreator.initialize(uimaContext);
-
- } catch (Exception e) {
+ } catch (IOException e) {
logger.info("Chunker model: " + chunkerModelPath);
throw new ResourceInitializationException(e);
}
+
+ try {
+ chunkerCreator = (ChunkCreator) Class.forName(chunkerCreatorClassName).newInstance();
+ } catch (InstantiationException | IllegalAccessException
+ | ClassNotFoundException e) {
+ logger.error("Error creating chunkerCreator from classname: " + chunkerCreatorClassName);
+ throw new ResourceInitializationException(e);
+ }
+ chunkerCreator.initialize(uimaContext);
}
- public void process(JCas jCas) throws AnalysisEngineProcessException {
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
logger.info(" process(JCas)");
- List<BaseToken> tokens = new ArrayList<BaseToken>();
-
- AnnotationIndex baseTokenIndex = jCas.getAnnotationIndex(BaseToken.type);
- FSIterator sentences = jCas.getAnnotationIndex(Sentence.type).iterator();
-
- while (sentences.hasNext()) {
- Sentence sentence = (Sentence) sentences.next();
-
- tokens.clear();
-
- FSIterator tokenIterator = baseTokenIndex.subiterator(sentence);
- while (tokenIterator.hasNext()) {
- BaseToken token = (BaseToken) tokenIterator.next();
- tokens.add(token);
- }
-
- String[] words = new String[tokens.size()];
- String[] tags = new String[tokens.size()];
- for (int i = 0; i < tokens.size(); i++) {
- words[i] = tokens.get(i).getCoveredText();
- tags[i] = tokens.get(i).getPartOfSpeech();
- }
+ Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+
+ for(Sentence sentence : sentences){
+ List<BaseToken> tokens = JCasUtil.selectCovered(BaseToken.class, sentence);
+ String[] words = new String[tokens.size()];
+ String[] tags = new String[tokens.size()];
+ for(int i = 0; i < tokens.size(); i++){
+ words[i] = tokens.get(i).getCoveredText();
+ tags[i] = tokens.get(i).getPartOfSpeech();
+ }
String[] chunks = chunker.chunk(words, tags);
@@ -142,24 +143,24 @@ public class Chunker extends JCasAnnotat
// TreebankChunker.validOutcome()
// This code was directly modified from TreebankChunker.main()
for (int i = 0; i < chunks.length; i++) {
-
- if (i > 0 && !chunks[i].startsWith("I-")) { // && !chunks[i - 1].equals("O")) {
- chunkEnd = tokens.get(i - 1).getEnd();
- chunkerCreator.createChunk(jCas, chunkBegin, chunkEnd, chunkType);
- }
-
- if (chunks[i].startsWith("B-")) {
- chunkBegin = tokens.get(i).getBegin();
- chunkType = chunks[i].substring(2);
- } else if (chunks[i].equals("O")) { // O found (her_PRP$ ear_O)
- chunkBegin = tokens.get(i).getBegin();
- chunkType = chunks[i];
-
- }
+
+ if (i > 0 && !chunks[i].startsWith("I-")) { // && !chunks[i - 1].equals("O")) {
+ chunkEnd = tokens.get(i - 1).getEnd();
+ chunkerCreator.createChunk(jCas, chunkBegin, chunkEnd, chunkType);
+ }
+
+ if (chunks[i].startsWith("B-")) {
+ chunkBegin = tokens.get(i).getBegin();
+ chunkType = chunks[i].substring(2);
+ } else if (chunks[i].equals("O")) { // O found (her_PRP$ ear_O)
+ chunkBegin = tokens.get(i).getBegin();
+ chunkType = chunks[i];
+
+ }
}
if (chunks.length > 0 && !chunks[chunks.length - 1].equals("O")) {
- chunkEnd = tokens.get(chunks.length - 1).getEnd();
- chunkerCreator.createChunk(jCas, chunkBegin, chunkEnd, chunkType);
+ chunkEnd = tokens.get(chunks.length - 1).getEnd();
+ chunkerCreator.createChunk(jCas, chunkBegin, chunkEnd, chunkType);
}
}
}