You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/17 18:41:14 UTC
svn commit: r1588305 -
/ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java
Author: tmill
Date: Thu Apr 17 16:41:14 2014
New Revision: 1588305
URL: http://svn.apache.org/r1588305
Log:
CTAKES-197: Java 7 updates (mainly type/generic stuff, some try-with-resources) to LVG Annotator.
Modified:
ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java
Modified: ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java?rev=1588305&r1=1588304&r2=1588305&view=diff
==============================================================================
--- ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java (original)
+++ ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java Thu Apr 17 16:41:14 2014
@@ -23,6 +23,7 @@ import org.apache.ctakes.lvg.resource.Lv
import org.apache.ctakes.typesystem.type.syntax.Lemma;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textspan.Segment;
+
import gov.nih.nlm.nls.lvg.Api.LvgCmdApi;
import gov.nih.nlm.nls.lvg.Api.LvgLexItemApi;
import gov.nih.nlm.nls.lvg.Lib.Category;
@@ -99,13 +100,13 @@ public class LvgAnnotator extends JCasAn
private boolean useSegments;
- private Set skipSegmentsSet;
+ private Set<String> skipSegmentsSet;
private boolean useCmdCache;
private String cmdCacheFileLocation;
private int cmdCacheFreqCutoff;
- private Map xeroxTreebankMap;
+ private Map<String, String> xeroxTreebankMap;
private boolean postLemmas;
private boolean useLemmaCache;
@@ -113,12 +114,12 @@ public class LvgAnnotator extends JCasAn
private int lemmaCacheFreqCutoff;
// key = word, value = canonical word
- private Map normCacheMap;
+ private Map<String, String> normCacheMap;
// key = word, value = Set of Lemma objects
- private Map lemmaCacheMap;
+ private Map<String, Set<LemmaLocalClass>> lemmaCacheMap;
- private Set exclusionSet;
+ private Set<String> exclusionSet;
/**
* Performs initialization logic. This implementation just reads values for
@@ -126,7 +127,8 @@ public class LvgAnnotator extends JCasAn
*
* @see org.apache.uima.analysis_engine.annotator.BaseAnnotator#initialize(AnnotatorContext)
*/
- public void initialize(UimaContext aContext)
+ @Override
+ public void initialize(UimaContext aContext)
throws ResourceInitializationException {
super.initialize(aContext);
@@ -172,7 +174,7 @@ public class LvgAnnotator extends JCasAn
.booleanValue();
String[] skipSegmentIDs = (String[]) context
.getConfigParameterValue("SegmentsToSkip");
- skipSegmentsSet = new HashSet();
+ skipSegmentsSet = new HashSet<>();
for (int i = 0; i < skipSegmentIDs.length; i++) {
skipSegmentsSet.add(skipSegmentIDs[i]);
}
@@ -180,7 +182,7 @@ public class LvgAnnotator extends JCasAn
// Load Xerox Treebank tagset map
String xtMaps[] = (String[]) context
.getConfigParameterValue("XeroxTreebankMap");
- xeroxTreebankMap = new HashMap();
+ xeroxTreebankMap = new HashMap<>();
for (int i = 0; i < xtMaps.length; i++) {
StringTokenizer tokenizer = new StringTokenizer(xtMaps[i], "|");
if (tokenizer.countTokens() == 2) {
@@ -201,7 +203,7 @@ public class LvgAnnotator extends JCasAn
String[] wordsToExclude = (String[]) context
.getConfigParameterValue("ExclusionSet");
- exclusionSet = new HashSet();
+ exclusionSet = new HashSet<>();
for (int i = 0; i < wordsToExclude.length; i++) {
exclusionSet.add(wordsToExclude[i]);
}
@@ -210,10 +212,10 @@ public class LvgAnnotator extends JCasAn
.getConfigParameterValue(PARAM_POST_LEMMAS);
postLemmas = bPostLemmas == null ? false : bPostLemmas.booleanValue();
if (postLemmas) {
- Boolean useLemmaCache = (Boolean) context
+ Boolean useLemmaCacheParam = (Boolean) context
.getConfigParameterValue(PARAM_USE_LEMMA_CACHE);
- useLemmaCache = useLemmaCache == null ? false : useLemmaCache
- .booleanValue();
+ this.useLemmaCache = (useLemmaCacheParam == null ? false : useLemmaCacheParam
+ .booleanValue());
if (useLemmaCache) {
lemmaCacheFileLocation = (String) context
.getConfigParameterValue(PARAM_LEMMA_CACHE_FILE_LOCATION);
@@ -221,12 +223,12 @@ public class LvgAnnotator extends JCasAn
throw new ResourceInitializationException(new Exception(
"Parameter for " + PARAM_LEMMA_CACHE_FILE_LOCATION
+ " was not set."));
- Integer lemmaCacheFreqCutoff = (Integer) context
+ Integer lemmaCacheFreqCutoffParam = (Integer) context
.getConfigParameterValue(PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF);
- if (lemmaCacheFreqCutoff == null)
- lemmaCacheFreqCutoff = 20;
+ if (lemmaCacheFreqCutoffParam == null)
+ this.lemmaCacheFreqCutoff = 20;
else
- lemmaCacheFreqCutoff = lemmaCacheFreqCutoff.intValue();
+ this.lemmaCacheFreqCutoff = lemmaCacheFreqCutoffParam.intValue();
}
}
}
@@ -234,7 +236,8 @@ public class LvgAnnotator extends JCasAn
/**
* Invokes this annotator's analysis logic.
*/
- public void process(JCas jcas)
+ @Override
+ public void process(JCas jcas)
throws AnalysisEngineProcessException {
logger.info("process(JCas)");
@@ -244,7 +247,7 @@ public class LvgAnnotator extends JCasAn
try {
if (useSegments) {
JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- Iterator segmentItr = indexes.getAnnotationIndex(Segment.type)
+ Iterator<?> segmentItr = indexes.getAnnotationIndex(Segment.type)
.iterator();
while (segmentItr.hasNext()) {
Segment segmentAnnotation = (Segment) segmentItr.next();
@@ -273,7 +276,7 @@ public class LvgAnnotator extends JCasAn
int rangeEnd)
throws AnalysisEngineProcessException {
JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- Iterator wordItr = indexes.getAnnotationIndex(WordToken.type)
+ Iterator<?> wordItr = indexes.getAnnotationIndex(WordToken.type)
.iterator();
while (wordItr.hasNext()) {
WordToken wordAnnotation = (WordToken) wordItr.next();
@@ -305,7 +308,7 @@ public class LvgAnnotator extends JCasAn
// apply LVG processing to get canonical form
String canonicalForm = null;
if (useCmdCache) {
- canonicalForm = (String) normCacheMap.get(word);
+ canonicalForm = normCacheMap.get(word);
if (canonicalForm == null) {
// logger.info("["+ word+ "] was not found in LVG norm cache.");
}
@@ -337,16 +340,16 @@ public class LvgAnnotator extends JCasAn
throws AnalysisEngineProcessException {
// apply LVG processing to get lemmas
// key = lemma string, value = Set of POS tags
- Map lemmaMap = null;
+ Map<String, Set<String>> lemmaMap = null;
if (useLemmaCache) {
- Set lemmaSet = (Set) lemmaCacheMap.get(word);
+ Set<?> lemmaSet = lemmaCacheMap.get(word);
if (lemmaSet == null) {
// logger.info("["+ word+
// "] was not found in LVG lemma cache.");
} else {
- lemmaMap = new HashMap();
- Iterator lemmaItr = lemmaSet.iterator();
+ lemmaMap = new HashMap<>();
+ Iterator<?> lemmaItr = lemmaSet.iterator();
while (lemmaItr.hasNext()) {
LemmaLocalClass l = (LemmaLocalClass) lemmaItr.next();
lemmaMap.put(l.word, l.posSet);
@@ -355,10 +358,10 @@ public class LvgAnnotator extends JCasAn
}
if (lemmaMap == null) {
- lemmaMap = new HashMap();
+ lemmaMap = new HashMap<>();
try {
- Vector lexItems = lvgLexItem.MutateLexItem(word);
- Iterator lexItemItr = lexItems.iterator();
+ Vector<?> lexItems = lvgLexItem.MutateLexItem(word);
+ Iterator<?> lexItemItr = lexItems.iterator();
while (lexItemItr.hasNext()) {
LexItem li = (LexItem) lexItemItr.next();
@@ -369,14 +372,14 @@ public class LvgAnnotator extends JCasAn
// note that POS is Xerox tagset
String lemmaPos = Category.ToName(bitValues[i]);
// convert Xerox tagset to PennTreebank tagset
- String treebankTag = (String) xeroxTreebankMap
+ String treebankTag = xeroxTreebankMap
.get(lemmaPos);
if (treebankTag != null) {
- Set posSet = null;
+ Set<String> posSet = null;
if (lemmaMap.containsKey(lemmaStr)) {
- posSet = (Set) lemmaMap.get(lemmaStr);
+ posSet = lemmaMap.get(lemmaStr);
} else {
- posSet = new HashSet();
+ posSet = new HashSet<>();
}
posSet.add(treebankTag);
lemmaMap.put(lemmaStr, posSet);
@@ -390,13 +393,13 @@ public class LvgAnnotator extends JCasAn
// add lemma information to CAS
// FSArray lemmas = new FSArray(jcas, lemmaMap.keySet().size());
- Collection lemmas = new ArrayList(lemmaMap.keySet().size());
+ Collection<Lemma> lemmas = new ArrayList<>(lemmaMap.keySet().size());
- Iterator lemmaStrItr = lemmaMap.keySet().iterator();
+ Iterator<String> lemmaStrItr = lemmaMap.keySet().iterator();
while (lemmaStrItr.hasNext()) {
- String form = (String) lemmaStrItr.next();
- Set posTagSet = (Set) lemmaMap.get(form);
- Iterator posTagItr = posTagSet.iterator();
+ String form = lemmaStrItr.next();
+ Set<?> posTagSet = lemmaMap.get(form);
+ Iterator<?> posTagItr = posTagSet.iterator();
while (posTagItr.hasNext()) {
String pos = (String) posTagItr.next(); // part of speech
Lemma lemma = new Lemma(jcas);
@@ -405,7 +408,7 @@ public class LvgAnnotator extends JCasAn
lemmas.add(lemma);
}
}
- Lemma[] lemmaArray = (Lemma[]) lemmas.toArray(new Lemma[lemmas.size()]);
+ Lemma[] lemmaArray = lemmas.toArray(new Lemma[lemmas.size()]);
FSList fsList = ListFactory.buildList(jcas, lemmaArray);
wordAnnotation.setLemmaEntries(fsList);
}
@@ -417,37 +420,36 @@ public class LvgAnnotator extends JCasAn
*/
private void loadCmdCacheFile(String cpLocation)
throws FileNotFoundException, IOException {
- InputStream inStream = getClass().getResourceAsStream(cpLocation);
- if (inStream == null) {
- throw new FileNotFoundException("Unable to find: " + cpLocation);
- }
- BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
-
- // initialize map
- normCacheMap = new HashMap();
-
- String line = br.readLine();
- while (line != null) {
- StringTokenizer st = new StringTokenizer(line, "|");
- if (st.countTokens() == 7) {
- int freq = Integer.parseInt(st.nextToken());
- if (freq > cmdCacheFreqCutoff) {
- String origWord = st.nextToken();
- String normWord = st.nextToken();
- if (!normCacheMap.containsKey(origWord)) {
- // if there are duplicates, then only have the first
- // occurrence in the map
- normCacheMap.put(origWord, normWord);
- }
- } else {
- logger.debug("Discarding norm cache line due to frequency cutoff: "
- + line);
- }
- } else {
- logger.warn("Invalid LVG norm cache " + "line: " + line);
- }
- line = br.readLine();
- }
+ try(
+ InputStream inStream = getClass().getResourceAsStream(cpLocation);
+ BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
+ ){
+ // initialize map
+ normCacheMap = new HashMap<>();
+
+ String line = br.readLine();
+ while (line != null) {
+ StringTokenizer st = new StringTokenizer(line, "|");
+ if (st.countTokens() == 7) {
+ int freq = Integer.parseInt(st.nextToken());
+ if (freq > cmdCacheFreqCutoff) {
+ String origWord = st.nextToken();
+ String normWord = st.nextToken();
+ if (!normCacheMap.containsKey(origWord)) {
+ // if there are duplicates, then only have the first
+ // occurrence in the map
+ normCacheMap.put(origWord, normWord);
+ }
+ } else {
+ logger.debug("Discarding norm cache line due to frequency cutoff: "
+ + line);
+ }
+ } else {
+ logger.warn("Invalid LVG norm cache " + "line: " + line);
+ }
+ line = br.readLine();
+ }
+ }
}
/**
@@ -457,63 +459,62 @@ public class LvgAnnotator extends JCasAn
*/
private void loadLemmaCacheFile(String cpLocation)
throws FileNotFoundException, IOException {
- InputStream inStream = getClass().getResourceAsStream(cpLocation);
- if (inStream == null) {
- throw new FileNotFoundException("Unable to find: " + cpLocation);
- }
- BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
-
- // initialize map
- lemmaCacheMap = new HashMap();
-
- String line = br.readLine();
- while (line != null) {
- StringTokenizer st = new StringTokenizer(line, "|");
- if (st.countTokens() == 4) // JZ: changed from 7 to 4 as used a new
- // dictionary
- {
- int freq = Integer.parseInt(st.nextToken());
- if (freq > lemmaCacheFreqCutoff) {
- String origWord = st.nextToken();
- String lemmaWord = st.nextToken();
- String combinedCategories = st.nextToken();
-
- // strip < and > chars
- combinedCategories = combinedCategories.substring(1,
- combinedCategories.length() - 1);
-
- // construct Lemma object
- LemmaLocalClass l = new LemmaLocalClass();
- l.word = lemmaWord;
- l.posSet = new HashSet();
- long bitVector = Category.ToValue(combinedCategories);
- long[] bitValues = Category.ToValuesArray(bitVector);
- for (int i = 0; i < bitValues.length; i++) {
- String pos = Category.ToName(bitValues[i]);
- // convert Xerox tag into Treebank
- String treebankTag = (String) xeroxTreebankMap.get(pos);
- if (treebankTag != null) {
- l.posSet.add(treebankTag);
- }
- }
-
- // add Lemma to cache map
- Set lemmaSet = null;
- if (!lemmaCacheMap.containsKey(origWord)) {
- lemmaSet = new HashSet();
- } else {
- lemmaSet = (Set) lemmaCacheMap.get(origWord);
- }
- lemmaSet.add(l);
- lemmaCacheMap.put(origWord, lemmaSet);
- } else {
- logger.debug("Discarding lemma cache line due to frequency cutoff: "
- + line);
- }
- } else {
- logger.warn("Invalid LVG lemma cache " + "line: " + line);
- }
- line = br.readLine();
+ try(
+ InputStream inStream = getClass().getResourceAsStream(cpLocation);
+ BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
+ ){
+ // initialize map
+ lemmaCacheMap = new HashMap<>();
+
+ String line = br.readLine();
+ while (line != null) {
+ StringTokenizer st = new StringTokenizer(line, "|");
+ if (st.countTokens() == 4) // JZ: changed from 7 to 4 as used a new
+ // dictionary
+ {
+ int freq = Integer.parseInt(st.nextToken());
+ if (freq > lemmaCacheFreqCutoff) {
+ String origWord = st.nextToken();
+ String lemmaWord = st.nextToken();
+ String combinedCategories = st.nextToken();
+
+ // strip < and > chars
+ combinedCategories = combinedCategories.substring(1,
+ combinedCategories.length() - 1);
+
+ // construct Lemma object
+ LemmaLocalClass l = new LemmaLocalClass();
+ l.word = lemmaWord;
+ l.posSet = new HashSet<>();
+ long bitVector = Category.ToValue(combinedCategories);
+ long[] bitValues = Category.ToValuesArray(bitVector);
+ for (int i = 0; i < bitValues.length; i++) {
+ String pos = Category.ToName(bitValues[i]);
+ // convert Xerox tag into Treebank
+ String treebankTag = xeroxTreebankMap.get(pos);
+ if (treebankTag != null) {
+ l.posSet.add(treebankTag);
+ }
+ }
+
+ // add Lemma to cache map
+ Set<LemmaLocalClass> lemmaSet = null;
+ if (!lemmaCacheMap.containsKey(origWord)) {
+ lemmaSet = new HashSet<>();
+ } else {
+ lemmaSet = lemmaCacheMap.get(origWord);
+ }
+ lemmaSet.add(l);
+ lemmaCacheMap.put(origWord, lemmaSet);
+ } else {
+ logger.debug("Discarding lemma cache line due to frequency cutoff: "
+ + line);
+ }
+ } else {
+ logger.warn("Invalid LVG lemma cache " + "line: " + line);
+ }
+ line = br.readLine();
+ }
}
}
@@ -525,7 +526,7 @@ public class LvgAnnotator extends JCasAn
class LemmaLocalClass {
public String word;
- public Set posSet;
+ public Set<String> posSet;
}
}
\ No newline at end of file