You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/07/09 18:12:44 UTC

svn commit: r1501362 - /ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/POSTagger.java

Author: chenpei
Date: Tue Jul  9 16:12:44 2013
New Revision: 1501362

URL: http://svn.apache.org/r1501362
Log:
CTAKES-97 - While working on the POSTagger code, removed the code that generated the java warnings (imports, etc.) and simplified the loading.

Modified:
    ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/POSTagger.java

Modified: ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/POSTagger.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/POSTagger.java?rev=1501362&r1=1501361&r2=1501362&view=diff
==============================================================================
--- ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/POSTagger.java (original)
+++ ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/POSTagger.java Tue Jul  9 16:12:44 2013
@@ -42,8 +42,6 @@
  * The following changes have been made:
  * - import of different sentence and token types.
  * - removed original comments
- * - added TAG_DICIONARY_PARAM
- * - added CASE_SENSITIVE_PARAM
  * - typed the collections used in process
  * - throws an exception instead of printing out an error message.
  * 
@@ -52,16 +50,12 @@
 
 package org.apache.ctakes.postagger;
 
-import java.io.File;
-import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.List;
 
-//import opennlp.tools.lang.english.PosTagger;
-import opennlp.model.AbstractModel;
-import opennlp.tools.postag.POSDictionary;
 import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.TagDictionary;
 
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
@@ -88,29 +82,35 @@ public class POSTagger extends JCasAnnot
 	 * resources/models/README.
 	 */
 	public static final String POS_MODEL_FILE_PARAM = "PosModelFile";
-
-	public static final String CASE_SENSITIVE_PARAM = "CaseSensitive";
-
 	private opennlp.tools.postag.POSTaggerME tagger;
 
-	public void initialize(UimaContext uimaContext)	throws ResourceInitializationException {
+	@Override
+	public void initialize(UimaContext uimaContext)
+			throws ResourceInitializationException {
 		super.initialize(uimaContext);
 
 		String posModelPath = null;
+		InputStream fis = null;
 
 		try {
-			posModelPath = (String) uimaContext.getConfigParameterValue(POS_MODEL_FILE_PARAM);
-			File posModelFile = FileLocator.locateFile(posModelPath);
-			String modelFileAbsPath = posModelFile.getAbsolutePath();
-			logger.info("POS tagger model file: " + modelFileAbsPath);
-
-			FileInputStream fis = new FileInputStream(posModelFile);
-			POSModel modelFile = new POSModel(fis); // skip using the tag dictionary for now since OpenNLP (1.5) changed
-			tagger = new opennlp.tools.postag.POSTaggerME(modelFile); //, tagDictionary);
-
+			posModelPath = (String) uimaContext
+					.getConfigParameterValue(POS_MODEL_FILE_PARAM);
+			logger.info("POS tagger model file: " + posModelPath);
+			fis = FileLocator.getAsStream(posModelPath);
+			POSModel modelFile = new POSModel(fis);
+			tagger = new opennlp.tools.postag.POSTaggerME(modelFile);
+			fis.close();
 		} catch (Exception e) {
-			logger.info("POS tagger model: " + posModelPath);
+			logger.info("Error loading POS tagger model: " + posModelPath);
 			throw new ResourceInitializationException(e);
+		} finally {
+			try {
+				if (fis != null) {
+					fis.close();
+				}
+			} catch (IOException e) {
+				throw new ResourceInitializationException(e);
+			}
 		}
 	}
 
@@ -121,9 +121,11 @@ public class POSTagger extends JCasAnnot
 		List<BaseToken> tokens = new ArrayList<BaseToken>();
 		List<String> words = new ArrayList<String>();
 
-		AnnotationIndex baseTokenIndex = jCas.getAnnotationIndex(BaseToken.type);
+		AnnotationIndex baseTokenIndex = jCas
+				.getAnnotationIndex(BaseToken.type);
 
-		FSIterator sentences = jCas.getAnnotationIndex(Sentence.type).iterator();
+		FSIterator sentences = jCas.getAnnotationIndex(Sentence.type)
+				.iterator();
 
 		while (sentences.hasNext()) {
 			Sentence sentence = (Sentence) sentences.next();
@@ -143,8 +145,9 @@ public class POSTagger extends JCasAnnot
 				wordTagList = tagger.tag(words);
 			}
 			// else {
-			// 	logger.info("sentence has no words = '" + sentence.getCoveredText()
-			// 		+ "' at (" +sentence.getBegin() + "," + sentence.getEnd() + ")");
+			// logger.info("sentence has no words = '" +
+			// sentence.getCoveredText()
+			// + "' at (" +sentence.getBegin() + "," + sentence.getEnd() + ")");
 			// }
 
 			try {
@@ -155,7 +158,8 @@ public class POSTagger extends JCasAnnot
 				}
 			} catch (IndexOutOfBoundsException e) {
 				throw new AnalysisEngineProcessException(
-						"sentence being tagged is: '" + sentence.getCoveredText() + "'", null, e);
+						"sentence being tagged is: '"
+								+ sentence.getCoveredText() + "'", null, e);
 			}
 		}
 	}