You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by vj...@apache.org on 2014/04/28 04:44:40 UTC
svn commit: r1590550 - in /ctakes/trunk: ctakes-assertion-zoner/
ctakes-assertion/
ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/
ctakes-clinical-pipeline/ ctakes-core/ ctakes-dependency-parser/
ctakes-dependency-parser/s...
Author: vjapache
Date: Mon Apr 28 02:44:39 2014
New Revision: 1590550
URL: http://svn.apache.org/r1590550
Log:
#CTAKES-253, CTAKES-292 ytex merge
Added:
ctakes/trunk/ctakes-distribution/src/main/bin/ant
- copied unchanged from r1590545, ctakes/branches/ytex/ctakes-distribution/src/main/bin/ant
ctakes/trunk/ctakes-distribution/src/main/bin/ant.bat
- copied unchanged from r1590545, ctakes/branches/ytex/ctakes-distribution/src/main/bin/ant.bat
ctakes/trunk/ctakes-distribution/src/main/bin/ctakes.profile
- copied unchanged from r1590545, ctakes/branches/ytex/ctakes-distribution/src/main/bin/ctakes.profile
ctakes/trunk/ctakes-distribution/src/main/bin/setenv.bat
- copied unchanged from r1590545, ctakes/branches/ytex/ctakes-distribution/src/main/bin/setenv.bat
ctakes/trunk/ctakes-distribution/src/main/bin/ytexweb.bat
- copied unchanged from r1590545, ctakes/branches/ytex/ctakes-distribution/src/main/bin/ytexweb.bat
ctakes/trunk/ctakes-distribution/src/main/bin/ytexweb.sh
- copied unchanged from r1590545, ctakes/branches/ytex/ctakes-distribution/src/main/bin/ytexweb.sh
Modified:
ctakes/trunk/ctakes-assertion-zoner/.classpath
ctakes/trunk/ctakes-assertion-zoner/pom.xml
ctakes/trunk/ctakes-assertion/pom.xml
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/CharacterOffsetToLineTokenConverterCtakesImpl.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/SingleDocumentProcessorCtakes.java
ctakes/trunk/ctakes-clinical-pipeline/ (props changed)
ctakes/trunk/ctakes-clinical-pipeline/pom.xml
ctakes/trunk/ctakes-core/pom.xml
ctakes/trunk/ctakes-dependency-parser/pom.xml
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java
ctakes/trunk/ctakes-distribution/pom.xml
ctakes/trunk/ctakes-distribution/src/main/assembly/bin.xml
ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCPE.bat
ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCVD.bat
ctakes/trunk/ctakes-drug-ner/src/main/java/org/apache/ctakes/drugner/ae/DrugMentionAnnotator.java
Modified: ctakes/trunk/ctakes-assertion-zoner/.classpath
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion-zoner/.classpath?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion-zoner/.classpath (original)
+++ ctakes/trunk/ctakes-assertion-zoner/.classpath Mon Apr 28 02:44:39 2014
@@ -17,12 +17,6 @@
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
- <classpathentry kind="src" output="target/classes" path="target/generated-sources/jcasgen">
- <attributes>
- <attribute name="optional" value="true"/>
- <attribute name="maven.pomderived" value="true"/>
- </attributes>
- </classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
<attributes>
<attribute name="maven.pomderived" value="true"/>
Modified: ctakes/trunk/ctakes-assertion-zoner/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion-zoner/pom.xml?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion-zoner/pom.xml (original)
+++ ctakes/trunk/ctakes-assertion-zoner/pom.xml Mon Apr 28 02:44:39 2014
@@ -46,7 +46,12 @@
<groupId>org.uimafit</groupId>
<artifactId>uimafit</artifactId>
</dependency>
- <dependency>
+ <!-- we excluded spring-context uimafit transitive dependency in the parent pom, include it here -->
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-context</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.apache.ctakes</groupId>
<artifactId>ctakes-core</artifactId>
</dependency>
Modified: ctakes/trunk/ctakes-assertion/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/pom.xml?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/pom.xml (original)
+++ ctakes/trunk/ctakes-assertion/pom.xml Mon Apr 28 02:44:39 2014
@@ -159,6 +159,11 @@
<groupId>org.uimafit</groupId>
<artifactId>uimafit</artifactId>
</dependency>
+ <!-- we excluded spring-context uimafit transitive dependency in the parent pom, include it here -->
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-context</artifactId>
+ </dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/CharacterOffsetToLineTokenConverterCtakesImpl.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/CharacterOffsetToLineTokenConverterCtakesImpl.java?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/CharacterOffsetToLineTokenConverterCtakesImpl.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/CharacterOffsetToLineTokenConverterCtakesImpl.java Mon Apr 28 02:44:39 2014
@@ -19,11 +19,13 @@
package org.apache.ctakes.assertion.medfacts.i2b2.api;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
-import org.apache.log4j.Logger;
+import org.apache.log4j.Logger;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationIndex;
@@ -32,8 +34,8 @@ import org.apache.uima.jcas.tcas.Annotat
import org.mitre.medfacts.i2b2.api.ApiConcept;
import org.mitre.medfacts.zoner.CharacterOffsetToLineTokenConverter;
import org.mitre.medfacts.zoner.LineAndTokenPosition;
-
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
public class CharacterOffsetToLineTokenConverterCtakesImpl implements CharacterOffsetToLineTokenConverter
@@ -43,6 +45,7 @@ public class CharacterOffsetToLineTokenC
protected TreeMap<Integer, Sentence> beginTreeMap;
protected TreeSet<Integer> tokenBeginEndTreeSet;
+ protected Map<Sentence, List<BaseToken>> sentenceToTokenNumberMap;
public CharacterOffsetToLineTokenConverterCtakesImpl()
{
@@ -54,6 +57,7 @@ public class CharacterOffsetToLineTokenC
this.jcas = jcas;
buildSentenceBoundaryMap();
buildTokenBoundaryMap();
+ buildSentenceToTokenNumberMap();
}
public void buildSentenceBoundaryMap()
@@ -78,14 +82,30 @@ public class CharacterOffsetToLineTokenC
for (Annotation current : annotationIndex)
{
BaseToken bt = (BaseToken)current;
- int begin = bt.getBegin();
- int end = bt.getEnd();
-
- tokenBeginEndTreeSet.add(begin);
- tokenBeginEndTreeSet.add(end);
+ // filter out NewlineToken
+ if (!(bt instanceof NewlineToken)) {
+ int begin = bt.getBegin();
+ int end = bt.getEnd();
+ tokenBeginEndTreeSet.add(begin);
+ tokenBeginEndTreeSet.add(end);
+ }
}
}
+ protected void buildSentenceToTokenNumberMap() {
+ sentenceToTokenNumberMap = new HashMap<Sentence, List<BaseToken>>();
+ for (Sentence s : beginTreeMap.values()) {
+ FSIterator<Annotation> tokensInSentenceIterator = jcas
+ .getAnnotationIndex(BaseToken.type).subiterator(s);
+ List<BaseToken> btList = new ArrayList<BaseToken>();
+ BaseToken bt = null;
+ while ((bt = this.getNextNonEOLToken(tokensInSentenceIterator)) != null) {
+ btList.add(bt);
+ }
+ sentenceToTokenNumberMap.put(s, btList);
+ }
+ }
+
public Sentence findPreviousOrCurrentSentence(int characterOffset)
{
Integer floorKey = beginTreeMap.floorKey(characterOffset);
@@ -171,36 +191,29 @@ public class CharacterOffsetToLineTokenC
int lineNumber = sentence.getSentenceNumber() + 1;
- FSIterator<Annotation> tokensInSentenceIterator =
- jcas.getAnnotationIndex(baseTokenTypeId).subiterator(sentence);
-
- if (!tokensInSentenceIterator.hasNext())
- {
- throw new RuntimeException("First token in sentence not found!!");
- }
- Annotation firstTokenAnnotation = tokensInSentenceIterator.next();
- BaseToken firstToken = (BaseToken)firstTokenAnnotation;
- int firstTokenInSentenceNumber = firstToken.getTokenNumber();
-
-
- FSIterator<Annotation> beginTokenInSentenceIterator =
- constraintConstructorFindContainedBy.createFilteredIterator(
- characterOffset, characterOffset, baseTokenType);
-
- if (!beginTokenInSentenceIterator.hasNext())
- {
- throw new RuntimeException("First token in sentence not found!! (character offset request = " + characterOffset);
- }
- Annotation beginTokenAnnotation = beginTokenInSentenceIterator.next();
- BaseToken beginToken = (BaseToken)beginTokenAnnotation;
- int beginTokenNumber = beginToken.getTokenNumber();
- int beginTokenWordNumber = beginTokenNumber - firstTokenInSentenceNumber;
-
- LineAndTokenPosition b = new LineAndTokenPosition();
- b.setLine(lineNumber);
- b.setTokenOffset(beginTokenWordNumber);
-
- return b;
+ FSIterator<Annotation> beginTokenInSentenceIterator = constraintConstructorFindContainedBy
+ .createFilteredIterator(characterOffset, characterOffset,
+ baseTokenType);
+ BaseToken beginToken = this
+ .getNextNonEOLToken(beginTokenInSentenceIterator);
+ int beginTokenWordNumber = this.sentenceToTokenNumberMap.get(sentence)
+ .indexOf(beginToken);
+ LineAndTokenPosition b = new LineAndTokenPosition();
+ b.setLine(lineNumber);
+ b.setTokenOffset(beginTokenWordNumber);
+
+ return b;
+ }
+
+ public BaseToken getNextNonEOLToken(
+ FSIterator<Annotation> tokensInSentenceIterator) {
+ while (tokensInSentenceIterator.hasNext()) {
+ BaseToken bt = (BaseToken) tokensInSentenceIterator.next();
+ if (!(bt instanceof NewlineToken)) {
+ return bt;
+ }
+ }
+ return null;
}
public List<LineAndTokenPosition> calculateBeginAndEndOfConcept
@@ -212,79 +225,82 @@ public class CharacterOffsetToLineTokenC
public List<LineAndTokenPosition> calculateBeginAndEndOfConcept(
int problemBegin, int problemEnd)
{
- //int externalId = problem.getExternalId();
- //int sentenceTypeId = Sentence.type;
- int baseTokenTypeId = BaseToken.type;
- //jcas.getAnnotationIndex(sentenceTypeId);
-
- ConstraintConstructorFindContainedBy constraintConstructorFindContainedBy = new ConstraintConstructorFindContainedBy(jcas);
- ConstraintConstructorFindContainedWithin constraintConstructorFindContainedWithin = new ConstraintConstructorFindContainedWithin(jcas);
-
- //AnnotationIndex<Annotation> sentenceAnnotationIndex = jcas.getAnnotationIndex(sentenceTypeId);
- Type sentenceType = jcas.getTypeSystem().getType(Sentence.class.getName());
- Type baseTokenType = jcas.getTypeSystem().getType(BaseToken.class.getName());
- ///
- FSIterator<Annotation> filteredIterator =
- constraintConstructorFindContainedBy.createFilteredIterator(
- problemBegin, problemEnd, sentenceType);
- ///
- if (!filteredIterator.hasNext())
- {
- throw new RuntimeException("Surrounding sentence annotation not found!!");
- }
- Annotation sentenceAnnotation = filteredIterator.next();
- Sentence sentence = (Sentence)sentenceAnnotation;
- int lineNumber = sentence.getSentenceNumber() + 1;
-
-
- FSIterator<Annotation> tokensInSentenceIterator =
- jcas.getAnnotationIndex(baseTokenTypeId).subiterator(sentence);
-
- if (!tokensInSentenceIterator.hasNext())
- {
- throw new RuntimeException("First token in sentence not found!!");
- }
- Annotation firstTokenAnnotation = tokensInSentenceIterator.next();
- BaseToken firstToken = (BaseToken)firstTokenAnnotation;
- int firstTokenInSentenceNumber = firstToken.getTokenNumber();
-
-
- FSIterator<Annotation> beginTokenInSentenceIterator =
- constraintConstructorFindContainedWithin.createFilteredIterator(
- problemBegin, problemEnd, baseTokenType);
-
- if (!beginTokenInSentenceIterator.hasNext())
- {
- throw new RuntimeException("First token in sentence not found!!");
- }
- Annotation beginTokenAnnotation = beginTokenInSentenceIterator.next();
- BaseToken beginToken = (BaseToken)beginTokenAnnotation;
- int beginTokenNumber = beginToken.getTokenNumber();
- int beginTokenWordNumber = beginTokenNumber - firstTokenInSentenceNumber;
-
-
- beginTokenInSentenceIterator.moveToLast();
- if (!beginTokenInSentenceIterator.hasNext())
- {
- throw new RuntimeException("First token in sentence not found!!");
- }
- Annotation endTokenAnnotation = beginTokenInSentenceIterator.next();
- BaseToken endToken = (BaseToken)endTokenAnnotation;
- int endTokenNumber = endToken.getTokenNumber();
- int endTokenWordNumber = endTokenNumber - firstTokenInSentenceNumber;
-
-
- ArrayList<LineAndTokenPosition> list = new ArrayList<LineAndTokenPosition>();
- LineAndTokenPosition b = new LineAndTokenPosition();
- b.setLine(lineNumber);
- b.setTokenOffset(beginTokenWordNumber);
- list.add(b);
- LineAndTokenPosition e = new LineAndTokenPosition();
- e.setLine(lineNumber);
- e.setTokenOffset(endTokenWordNumber);
- System.out.println("Adding lineTokenEnding " + lineNumber + " offset = " + endTokenWordNumber);
- list.add(e);
- return list;
+ // int externalId = problem.getExternalId();
+ // int sentenceTypeId = Sentence.type;
+ int baseTokenTypeId = BaseToken.type;
+ // jcas.getAnnotationIndex(sentenceTypeId);
+
+ ConstraintConstructorFindContainedBy constraintConstructorFindContainedBy = new ConstraintConstructorFindContainedBy(
+ jcas);
+ ConstraintConstructorFindContainedWithin constraintConstructorFindContainedWithin = new ConstraintConstructorFindContainedWithin(
+ jcas);
+
+ // AnnotationIndex<Annotation> sentenceAnnotationIndex =
+ // jcas.getAnnotationIndex(sentenceTypeId);
+ Type sentenceType = jcas.getTypeSystem().getType(
+ Sentence.class.getName());
+ Type baseTokenType = jcas.getTypeSystem().getType(
+ BaseToken.class.getName());
+ // /
+ FSIterator<Annotation> filteredIterator = constraintConstructorFindContainedBy
+ .createFilteredIterator(problemBegin, problemEnd, sentenceType);
+ // /
+ if (!filteredIterator.hasNext()) {
+ throw new RuntimeException(
+ "Surrounding sentence annotation not found!!");
+ }
+ Annotation sentenceAnnotation = filteredIterator.next();
+ Sentence sentence = (Sentence) sentenceAnnotation;
+ int lineNumber = sentence.getSentenceNumber() + 1;
+
+ // FSIterator<Annotation> tokensInSentenceIterator = jcas
+ // .getAnnotationIndex(baseTokenTypeId).subiterator(sentence);
+ //
+ // if (!tokensInSentenceIterator.hasNext()) {
+ // throw new RuntimeException("First token in sentence not found!!");
+ // }
+ // Annotation firstTokenAnnotation = tokensInSentenceIterator.next();
+ // BaseToken firstToken = (BaseToken) firstTokenAnnotation;
+ // int firstTokenInSentenceNumber = firstToken.getTokenNumber();
+
+ FSIterator<Annotation> beginTokenInSentenceIterator = constraintConstructorFindContainedWithin
+ .createFilteredIterator(problemBegin, problemEnd, baseTokenType);
+
+ // if (!beginTokenInSentenceIterator.hasNext()) {
+ // throw new RuntimeException("First token in sentence not found!!");
+ // }
+ // Annotation beginTokenAnnotation =
+ // beginTokenInSentenceIterator.next();
+ // BaseToken beginToken = (BaseToken) beginTokenAnnotation;
+ // int beginTokenNumber = beginToken.getTokenNumber();
+ // int beginTokenWordNumber = beginTokenNumber
+ // - firstTokenInSentenceNumber;
+ BaseToken beginToken = this
+ .getNextNonEOLToken(beginTokenInSentenceIterator);
+ int beginTokenWordNumber = this.sentenceToTokenNumberMap.get(sentence)
+ .indexOf(beginToken);
+
+ beginTokenInSentenceIterator.moveToLast();
+ if (!beginTokenInSentenceIterator.hasNext()) {
+ throw new RuntimeException("First token in sentence not found!!");
+ }
+ Annotation endTokenAnnotation = beginTokenInSentenceIterator.next();
+ BaseToken endToken = (BaseToken) endTokenAnnotation;
+ // int endTokenNumber = endToken.getTokenNumber();
+ // int endTokenWordNumber = endTokenNumber - firstTokenInSentenceNumber;
+ int endTokenWordNumber = this.sentenceToTokenNumberMap.get(sentence)
+ .indexOf(endToken);
+
+ ArrayList<LineAndTokenPosition> list = new ArrayList<LineAndTokenPosition>();
+ LineAndTokenPosition b = new LineAndTokenPosition();
+ b.setLine(lineNumber);
+ b.setTokenOffset(beginTokenWordNumber);
+ list.add(b);
+ LineAndTokenPosition e = new LineAndTokenPosition();
+ e.setLine(lineNumber);
+ e.setTokenOffset(endTokenWordNumber);
+ list.add(e);
+ return list;
}
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/SingleDocumentProcessorCtakes.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/SingleDocumentProcessorCtakes.java?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/SingleDocumentProcessorCtakes.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/i2b2/api/SingleDocumentProcessorCtakes.java Mon Apr 28 02:44:39 2014
@@ -156,214 +156,31 @@ public class SingleDocumentProcessorCtak
return arrayOfLines;
}
+ /**
+ * delegate to converter to determine offset.
+ */
+ @Override
public LineAndTokenPosition convertCharacterOffsetToLineToken(int characterOffset)
{
- int baseTokenTypeId = BaseToken.type;
-
- ConstraintConstructorFindContainedBy constraintConstructorFindContainedBy = new ConstraintConstructorFindContainedBy(jcas);
- ConstraintConstructorFindContainedWithin constraintConstructorFindContainedWithin = new ConstraintConstructorFindContainedWithin(jcas);
-
- Type sentenceType = jcas.getTypeSystem().getType(Sentence.class.getName());
- Type baseTokenType = jcas.getTypeSystem().getType(BaseToken.class.getName());
-
- FSIterator<Annotation> filteredIterator =
- constraintConstructorFindContainedBy.createFilteredIterator(
- characterOffset, characterOffset, sentenceType);
-
- if (!filteredIterator.hasNext())
- {
- throw new RuntimeException("Surrounding sentence annotation not found!!");
- }
- Annotation sentenceAnnotation = filteredIterator.next();
- Sentence sentence = (Sentence)sentenceAnnotation;
- int lineNumber = sentence.getSentenceNumber() + 1;
-
-
- FSIterator<Annotation> tokensInSentenceIterator =
- jcas.getAnnotationIndex(baseTokenTypeId).subiterator(sentence);
-
- if (!tokensInSentenceIterator.hasNext())
- {
- throw new RuntimeException("First token in sentence not found!!");
- }
- Annotation firstTokenAnnotation = tokensInSentenceIterator.next();
- BaseToken firstToken = (BaseToken)firstTokenAnnotation;
- int firstTokenInSentenceNumber = firstToken.getTokenNumber();
-
-
- FSIterator<Annotation> beginTokenInSentenceIterator =
- constraintConstructorFindContainedBy.createFilteredIterator(
- characterOffset, characterOffset, baseTokenType);
-
- if (!beginTokenInSentenceIterator.hasNext())
- {
- throw new RuntimeException("First token in sentence not found!!");
- }
- Annotation beginTokenAnnotation = beginTokenInSentenceIterator.next();
- BaseToken beginToken = (BaseToken)beginTokenAnnotation;
- int beginTokenNumber = beginToken.getTokenNumber();
- int beginTokenWordNumber = beginTokenNumber - firstTokenInSentenceNumber;
-
- LineAndTokenPosition b = new LineAndTokenPosition();
- b.setLine(lineNumber);
- b.setTokenOffset(beginTokenWordNumber);
-
- return b;
+ return converter2.convert(characterOffset);
}
+ @Override
public List<LineAndTokenPosition> calculateBeginAndEndOfConcept
(ApiConcept problem)
{
return calculateBeginAndEndOfConcept(problem.getBegin(), problem.getEnd());
}
+ /**
+ * delegate to converter to determine offset.
+ */
public List<LineAndTokenPosition> calculateBeginAndEndOfConcept(
int problemBegin, int problemEnd)
{
- //int externalId = problem.getExternalId();
- //int sentenceTypeId = Sentence.type;
- int baseTokenTypeId = BaseToken.type;
- //jcas.getAnnotationIndex(sentenceTypeId);
-
- ConstraintConstructorFindContainedBy constraintConstructorFindContainedBy = new ConstraintConstructorFindContainedBy(jcas);
- ConstraintConstructorFindContainedWithin constraintConstructorFindContainedWithin = new ConstraintConstructorFindContainedWithin(jcas);
-
- //AnnotationIndex<Annotation> sentenceAnnotationIndex = jcas.getAnnotationIndex(sentenceTypeId);
- Type sentenceType = jcas.getTypeSystem().getType(Sentence.class.getName());
- Type baseTokenType = jcas.getTypeSystem().getType(BaseToken.class.getName());
- ///
- FSIterator<Annotation> filteredIterator =
- constraintConstructorFindContainedBy.createFilteredIterator(
- problemBegin, problemEnd, sentenceType);
- ///
- if (!filteredIterator.hasNext())
- {
- return null;
- //throw new RuntimeException("Surrounding sentence annotation not found!!");
- }
- Annotation sentenceAnnotation = filteredIterator.next();
- Sentence sentence = (Sentence)sentenceAnnotation;
- int lineNumber = sentence.getSentenceNumber() + 1;
-
-
- FSIterator<Annotation> tokensInSentenceIterator =
- jcas.getAnnotationIndex(baseTokenTypeId).subiterator(sentence);
-
- if (!tokensInSentenceIterator.hasNext())
- {
- String message = "First token in sentence not found [#1]!!";
- Exception e = new RuntimeException(message);
- logger.error(message,e);
- return null;
- }
- Annotation firstTokenAnnotation = tokensInSentenceIterator.next();
- BaseToken firstToken = (BaseToken)firstTokenAnnotation;
- int firstTokenInSentenceNumber = firstToken.getTokenNumber();
-
-
- FSIterator<Annotation> beginTokenInSentenceIterator =
- constraintConstructorFindContainedWithin.createFilteredIterator(
- problemBegin, problemEnd, baseTokenType);
-
- if (!beginTokenInSentenceIterator.hasNext())
- {
- String message = "First token in sentence not found [#2]!!";
- Exception e = new RuntimeException(message);
- logger.error(message, e);
- return null;
- }
- Annotation beginTokenAnnotation = beginTokenInSentenceIterator.next();
- BaseToken beginToken = (BaseToken)beginTokenAnnotation;
- int beginTokenNumber = beginToken.getTokenNumber();
- int beginTokenWordNumber = beginTokenNumber - firstTokenInSentenceNumber;
-
-
- beginTokenInSentenceIterator.moveToLast();
- if (!beginTokenInSentenceIterator.hasNext())
- {
- String message = "First token in sentence not found [#3]!!";
- Exception e = new RuntimeException(message);
- logger.error(message, e);
- return null;
- }
- Annotation endTokenAnnotation = beginTokenInSentenceIterator.next();
- BaseToken endToken = (BaseToken)endTokenAnnotation;
- int endTokenNumber = endToken.getTokenNumber();
- int endTokenWordNumber = endTokenNumber - firstTokenInSentenceNumber;
-
-
- ArrayList<LineAndTokenPosition> list = new ArrayList<LineAndTokenPosition>();
- LineAndTokenPosition b = new LineAndTokenPosition();
- b.setLine(lineNumber);
- b.setTokenOffset(beginTokenWordNumber);
- list.add(b);
- LineAndTokenPosition e = new LineAndTokenPosition();
- e.setLine(lineNumber);
- e.setTokenOffset(endTokenWordNumber);
- list.add(e);
- return list;
+ return ((CharacterOffsetToLineTokenConverterCtakesImpl) this.converter2)
+ .calculateBeginAndEndOfConcept(problemBegin, problemEnd);
}
-// /**
-// * @param problemBegin
-// * @param problemEnd
-// * @param sentenceType
-// * @return
-// */
-// public FSIterator<Annotation> createFilteredIteratorByBeginEndAndType(
-// int problemBegin, int problemEnd, Type sentenceType)
-// {
-// ConstraintFactory cf = jcas.getConstraintFactory();
-// TypeSystem ts = jcas.getTypeSystem();
-// Type annotationType = ts.getType(Annotation.class.getName());
-// Feature sentenceBeginFeature = annotationType.getFeatureByBaseName("begin");
-// FeaturePath sentenceBeginFeaturePath = jcas.createFeaturePath();
-// sentenceBeginFeaturePath.addFeature(sentenceBeginFeature);
-//
-// Feature sentenceEndFeature = annotationType.getFeatureByBaseName("end");
-// FeaturePath sentenceEndFeaturePath = jcas.createFeaturePath();
-// sentenceEndFeaturePath.addFeature(sentenceEndFeature);
-//
-// FSMatchConstraint beginAndEnd = constructContainedByConstraint(
-// problemBegin, problemEnd, cf, sentenceBeginFeaturePath,
-// sentenceEndFeaturePath);
-//
-//
-// FSTypeConstraint sentenceTypeConstraint = cf.createTypeConstraint();
-// sentenceTypeConstraint.add(sentenceType);
-//
-// FSMatchConstraint beginAndEndAndType = cf.and(beginAndEnd, sentenceTypeConstraint);
-//
-// FSIterator<Annotation> filteredIterator =
-// jcas.createFilteredIterator(jcas.getAnnotationIndex().iterator(), beginAndEndAndType);
-// return filteredIterator;
-// }
-//
-// /**
-// * @param problemBegin
-// * @param problemEnd
-// * @param cf
-// * @param sentenceBeginFeaturePath
-// * @param sentenceEndFeaturePath
-// * @return
-// */
-// public FSMatchConstraint constructContainedByConstraint(int problemBegin,
-// int problemEnd, ConstraintFactory cf,
-// FeaturePath sentenceBeginFeaturePath, FeaturePath sentenceEndFeaturePath)
-// {
-// FSIntConstraint sentenceBeginIntConstraint = cf.createIntConstraint();
-// sentenceBeginIntConstraint.leq(problemBegin);
-//
-// FSIntConstraint sentenceEndIntConstraint = cf.createIntConstraint();
-// sentenceEndIntConstraint.geq(problemEnd);
-//
-//
-// FSMatchConstraint begin = cf.embedConstraint(sentenceBeginFeaturePath, sentenceBeginIntConstraint);
-// FSMatchConstraint end = cf.embedConstraint(sentenceEndFeaturePath, sentenceEndIntConstraint);
-//
-// FSMatchConstraint beginAndEnd = cf.and(begin, end);
-// return beginAndEnd;
-// }
-
}
Propchange: ctakes/trunk/ctakes-clinical-pipeline/
------------------------------------------------------------------------------
Merged /ctakes/branches/ytex/ctakes-clinical-pipeline:r1551246-1590545
Modified: ctakes/trunk/ctakes-clinical-pipeline/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/pom.xml?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/pom.xml (original)
+++ ctakes/trunk/ctakes-clinical-pipeline/pom.xml Mon Apr 28 02:44:39 2014
@@ -81,6 +81,18 @@
<groupId>org.apache.ctakes</groupId>
<artifactId>ctakes-dependency-parser-res</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-ytex</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-ytex-res</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-ytex-uima</artifactId>
+ </dependency>
</dependencies>
<profiles>
<profile>
Modified: ctakes/trunk/ctakes-core/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/pom.xml?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/pom.xml (original)
+++ ctakes/trunk/ctakes-core/pom.xml Mon Apr 28 02:44:39 2014
@@ -45,6 +45,11 @@
<groupId>org.uimafit</groupId>
<artifactId>uimafit</artifactId>
</dependency>
+ <!-- we excluded spring-context uimafit transitive dependency in the parent pom, include it here -->
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-context</artifactId>
+ </dependency>
<dependency>
<!-- needed for uimafit, but excluded in parent pom -->
<groupId>commons-io</groupId>
@@ -63,10 +68,6 @@
<artifactId>cleartk-util</artifactId>
</dependency>
<dependency>
- <groupId>org.uimafit</groupId>
- <artifactId>uimafit</artifactId>
- </dependency>
- <dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
Modified: ctakes/trunk/ctakes-dependency-parser/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/pom.xml?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/pom.xml (original)
+++ ctakes/trunk/ctakes-dependency-parser/pom.xml Mon Apr 28 02:44:39 2014
@@ -73,6 +73,11 @@
<groupId>org.uimafit</groupId>
<artifactId>uimafit</artifactId>
</dependency>
+ <!-- we excluded spring-context uimafit transitive dependency in the parent pom, include it here -->
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-context</artifactId>
+ </dependency>
<dependency>
<groupId>com.carrotsearch</groupId>
<artifactId>hppc</artifactId>
Modified: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java (original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPSemanticRoleLabelerAE.java Mon Apr 28 02:44:39 2014
@@ -30,6 +30,7 @@ import org.apache.ctakes.core.resource.F
import org.apache.ctakes.core.util.ListFactory;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.textsem.Predicate;
import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
@@ -169,30 +170,36 @@ final String language = AbstractReader.L
int[] headIDs = new int[tokens.size()];
String[] deprels = new String[tokens.size()];
- // Initialize Token / Sentence info for the ClearNLP Semantic Role Labeler
+ // Initialize Token / Sentence info for the ClearNLP Semantic Role Labeler
+ // we are filtering out newline tokens
+ // use idIter as the non-newline token index counter
+ int idIter = 0;
for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = tokens.get(i);
-
- // Determine HeadId
- List<ConllDependencyNode> casDepNodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, token);
-
- ConllDependencyNode casDepNode = casDepNodes.get(0);
- if(casDepNode.getId() == 0) casDepNode = casDepNodes.get(1);
-
- deprels[i] = casDepNode.getDeprel();
- ConllDependencyNode head = casDepNode.getHead();
-
- // If there is no head, this is the head node, set node to 0
- headIDs[i] = (head == null) ? 0 : depNodeToID.get(head);
-
- // Populate Dependency Node / Tree information
- int id = i + 1;
- String form = casDepNode.getForm();
- String pos = casDepNode.getPostag();
- String lemma = casDepNode.getLemma();
-
- DEPNode node = new DEPNode(id, form, lemma, pos, new DEPFeat());
- tree.add(node);
+ BaseToken token = tokens.get(i);
+ // ignore newline tokens within a sentence - newline = whitespace = non-token
+ if(!(token instanceof NewlineToken)) {
+ // Determine HeadId
+ List<ConllDependencyNode> casDepNodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, token);
+
+ ConllDependencyNode casDepNode = casDepNodes.get(0);
+ if(casDepNode.getId() == 0) casDepNode = casDepNodes.get(1);
+
+ deprels[i] = casDepNode.getDeprel();
+ ConllDependencyNode head = casDepNode.getHead();
+
+ // If there is no head, this is the head node, set node to 0
+ headIDs[i] = (head == null) ? 0 : depNodeToID.get(head);
+
+ // Populate Dependency Node / Tree information
+ int id = idIter + 1;
+ String form = casDepNode.getForm();
+ String pos = casDepNode.getPostag();
+ String lemma = casDepNode.getLemma();
+
+ DEPNode node = new DEPNode(id, form, lemma, pos, new DEPFeat());
+ tree.add(node);
+ idIter++;
+ }
}
for (int i=1; i<tree.size(); i++)
Modified: ctakes/trunk/ctakes-distribution/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-distribution/pom.xml?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-distribution/pom.xml (original)
+++ ctakes/trunk/ctakes-distribution/pom.xml Mon Apr 28 02:44:39 2014
@@ -112,6 +112,20 @@
<groupId>org.apache.ctakes</groupId>
<artifactId>ctakes-smoking-status</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-ytex</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-ytex-uima</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-ytex-web</artifactId>
+ <type>jar</type>
+ <version>${project.version}</version>
+ </dependency>
</dependencies>
<build>
Modified: ctakes/trunk/ctakes-distribution/src/main/assembly/bin.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-distribution/src/main/assembly/bin.xml?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-distribution/src/main/assembly/bin.xml (original)
+++ ctakes/trunk/ctakes-distribution/src/main/assembly/bin.xml Mon Apr 28 02:44:39 2014
@@ -56,10 +56,18 @@
<!-- <include>org.apache.ctakes:ctakes-temporal</include> -->
<include>org.apache.ctakes:ctakes-type-system</include>
<include>org.apache.ctakes:ctakes-utils</include>
+ <include>org.apache.ctakes:ctakes-ytex</include>
+ <include>org.apache.ctakes:ctakes-ytex-uima</include>
</includes>
<excludes>
<exclude>net.sourceforge.ctakesresources:*</exclude>
+ <!-- exclude non-asf compliant dependencies used by ytex -->
+ <exclude>org.hibernate:*</exclude>
+ <exclude>nz.ac.waikato.cms.weka:*</exclude>
+ <exclude>mysql:*</exclude>
+ <exclude>com.microsoft.sqlserver:*</exclude>
+ <exclude>oracle.jdbc:*</exclude>
</excludes>
<unpack>false</unpack>
<useProjectArtifact>false</useProjectArtifact>
@@ -297,7 +305,7 @@
<outputDirectory>resources</outputDirectory>
<fileMode>644</fileMode>
<directoryMode>755</directoryMode>
- </fileSet>
+ </fileSet>
<fileSet>
<directory>../ctakes-pos-tagger/src/main/resources</directory>
<outputDirectory>resources</outputDirectory>
@@ -367,5 +375,29 @@
<fileMode>644</fileMode>
<directoryMode>755</directoryMode>
</fileSet>
+ <fileSet>
+ <directory>../ctakes-ytex-web/src/main/webapp</directory>
+ <outputDirectory>desc/ctakes-ytex-web</outputDirectory>
+ <fileMode>644</fileMode>
+ <directoryMode>755</directoryMode>
+ </fileSet>
+ <fileSet>
+ <directory>../ctakes-ytex-res/src/main/resources</directory>
+ <outputDirectory>resources</outputDirectory>
+ <fileMode>644</fileMode>
+ <directoryMode>755</directoryMode>
+ </fileSet>
+ <fileSet>
+ <directory>../ctakes-ytex-uima/src/main/resources</directory>
+ <outputDirectory>resources</outputDirectory>
+ <fileMode>644</fileMode>
+ <directoryMode>755</directoryMode>
+ </fileSet>
+ <fileSet>
+ <directory>../ctakes-ytex/scripts</directory>
+ <outputDirectory>bin/ctakes-ytex/scripts</outputDirectory>
+ <fileMode>644</fileMode>
+ <directoryMode>755</directoryMode>
+ </fileSet>
</fileSets>
</assembly>
\ No newline at end of file
Modified: ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCPE.bat
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCPE.bat?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCPE.bat (original)
+++ ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCPE.bat Mon Apr 28 02:44:39 2014
@@ -39,8 +39,9 @@ echo This environment variable is needed
goto end
:okHome
+@set PATH=%PATH%;%CTAKES_HOME%\lib\auth\x64
cd %CTAKES_HOME%
-java -cp "%CTAKES_HOME%/lib/*;%CTAKES_HOME%/desc/;%CTAKES_HOME%/resources/" -Dlog4j.configuration=file:/%CTAKES_HOME%/config/log4j.xml -Xms512M -Xmx1024M org.apache.uima.tools.cpm.CpmFrame
+java -cp "%CTAKES_HOME%/lib/*;%CTAKES_HOME%/desc/;%CTAKES_HOME%/resources/" -Dlog4j.configuration=file:/%CTAKES_HOME%/config/log4j.xml -Xms512M -Xmx2g org.apache.uima.tools.cpm.CpmFrame
:end
ENDLOCAL
Modified: ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCVD.bat
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCVD.bat?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCVD.bat (original)
+++ ctakes/trunk/ctakes-distribution/src/main/bin/runctakesCVD.bat Mon Apr 28 02:44:39 2014
@@ -38,7 +38,8 @@ echo This environment variable is needed
goto end
:okHome
+@set PATH=%PATH%;%CTAKES_HOME%\lib\auth\x64
cd %CTAKES_HOME%
-java -cp "%CTAKES_HOME%/lib/*;%CTAKES_HOME%/desc/;%CTAKES_HOME%/resources/" -Dlog4j.configuration=file:/%CTAKES_HOME%/config/log4j.xml -Xms512M -Xmx1024M org.apache.uima.tools.cvd.CVD
+java -cp "%CTAKES_HOME%/lib/*;%CTAKES_HOME%/desc/;%CTAKES_HOME%/resources/" -Dlog4j.configuration=file:/%CTAKES_HOME%/config/log4j.xml -Xms512M -Xmx2g org.apache.uima.tools.cvd.CVD
:end
\ No newline at end of file
Modified: ctakes/trunk/ctakes-drug-ner/src/main/java/org/apache/ctakes/drugner/ae/DrugMentionAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-drug-ner/src/main/java/org/apache/ctakes/drugner/ae/DrugMentionAnnotator.java?rev=1590550&r1=1590549&r2=1590550&view=diff
==============================================================================
--- ctakes/trunk/ctakes-drug-ner/src/main/java/org/apache/ctakes/drugner/ae/DrugMentionAnnotator.java (original)
+++ ctakes/trunk/ctakes-drug-ner/src/main/java/org/apache/ctakes/drugner/ae/DrugMentionAnnotator.java Mon Apr 28 02:44:39 2014
@@ -184,14 +184,10 @@ public class DrugMentionAnnotator extend
throws ResourceInitializationException
{
super.initialize(annotCtx);
- try
- {
- iv_medicationRelatedSections = ParamUtil.getStringParameterValuesSet(
+
+ iv_medicationRelatedSections = ParamUtil.getStringParameterValuesSet(
PARAM_SEGMENTS_MEDICATION_RELATED, annotCtx);
- } catch (ResourceAccessException e)
- {
- throw new ResourceInitializationException(e);
- }
+
iv_fractionFSM = new FractionStrengthFSM();
iv_suffixFSM = new SuffixStrengthFSM();