You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2014/10/15 21:29:44 UTC
svn commit: r1632167 -
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/
Author: clin
Date: Wed Oct 15 19:29:44 2014
New Revision: 1632167
URL: http://svn.apache.org/r1632167
Log:
Added more cleartk2.0 compliant feature extractors for within-sentence event-event event-time relations
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ConjunctionRelationFeaturesExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoordinateFeaturesExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependingVerbsFeatureExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DeterminerRelationFeaturesExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionRelationFeaturesExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationFeatureExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventsInTheSameSentenceExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/OverlappedHeadFeaturesExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SpecialAnnotationRelationExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETFlatExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXPropertyRelationFeaturesExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXRelationFeaturesExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TokenPropertyFeaturesExtractor.java
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventArgumentPropertyExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseRelationExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventTimeBetweenCandidatesExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderRelationExtractor.java
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ConjunctionRelationFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ConjunctionRelationFeaturesExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ConjunctionRelationFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ConjunctionRelationFeaturesExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,61 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+public class ConjunctionRelationFeaturesExtractor implements
+RelationFeaturesExtractor {
+
+ private String name = "ConjunctionFeature";
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ ArrayList<Feature> feats = new ArrayList<>();
+
+ int begin = arg1.getEnd();
+ int end = arg2.getBegin();
+
+ if ( begin > end ){
+ begin = arg2.getEnd();
+ end = arg1.getBegin();
+ }
+
+ if ( begin >= end ){
+ return feats;
+ }
+
+ List<BaseToken> betweenTokens = JCasUtil.selectCovered(jCas, BaseToken.class, begin, end);
+ List<EventMention> eventsInBetween = JCasUtil.selectCovered(jCas, EventMention.class, begin, end);
+ //filter events:
+ List<EventMention> realEvents = new ArrayList<>();
+ //filtering events
+ for(EventMention event : eventsInBetween){
+ // filter out ctakes events
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ eventsInBetween = realEvents;
+ if(eventsInBetween.size() ==0 ){
+ for (BaseToken token: betweenTokens){
+ String pos = token.getPartOfSpeech();
+ if(pos.startsWith("CC")||pos.equals(",")||pos.startsWith("IN")){
+ feats.add(new Feature(this.name, "Contain_Conjunction_inBetween"));
+ feats.add(new Feature(this.name, pos));
+ }
+ }
+ }
+
+ return feats;
+ }
+
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoordinateFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoordinateFeaturesExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoordinateFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoordinateFeaturesExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,68 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+public class CoordinateFeaturesExtractor implements
+RelationFeaturesExtractor {
+
+ @SuppressWarnings("null")
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ ArrayList<Feature> feats = new ArrayList<>();
+
+ List<BaseToken> arg1Tokens = JCasUtil.selectCovered(jCas, BaseToken.class, arg1);
+ List<BaseToken> arg2Tokens = JCasUtil.selectCovered(jCas, BaseToken.class, arg2);
+
+ int arg1Length = arg1Tokens == null ? 0 : arg1Tokens.size();
+ int arg2Length = arg2Tokens == null ? 0 : arg2Tokens.size();
+
+ if(arg1Length != arg2Length || arg1Length == 0 || arg2Length == 0){
+ return feats;
+ }
+
+ for (int i=0; i<arg1Length; i++){
+ if(!arg1Tokens.get(i).getPartOfSpeech().equals(arg2Tokens.get(i).getPartOfSpeech())){
+ return feats;
+ }
+ }
+
+ int begin = arg1.getEnd();
+ int end = arg2.getBegin();
+
+ if ( begin > end ){
+ begin = arg2.getEnd();
+ end = arg1.getBegin();
+ }
+
+ if ( begin >= end ){
+ return feats;
+ }
+
+ List<EventMention> betweenEvents = JCasUtil.selectCovered(jCas, EventMention.class, begin, end);
+ int eventInBetween = betweenEvents == null ? 0 : betweenEvents.size();
+ if(eventInBetween >0) return feats;
+
+ List<BaseToken> betweenTokens = JCasUtil.selectCovered(jCas, BaseToken.class, begin, end);
+ for (BaseToken token: betweenTokens){
+ String tokenwd = token.getCoveredText();
+ if(token.getPartOfSpeech().startsWith("CC")||tokenwd.equals(",")||tokenwd.equals(";")||tokenwd.equals("/")||tokenwd.equals("w")||tokenwd.equals("&")||tokenwd.equalsIgnoreCase("versus")||tokenwd.equalsIgnoreCase("vs")||tokenwd.equalsIgnoreCase("with")){
+ feats.add(new Feature("Coordinate_feature", "Coordinate"));
+ break;
+ }
+ }
+
+ return feats;
+ }
+
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependingVerbsFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependingVerbsFeatureExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependingVerbsFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependingVerbsFeatureExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,97 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+public class DependingVerbsFeatureExtractor implements RelationFeaturesExtractor {
+
+ private static final String FEAT_NAME = "Depending_Verb";
+
+ @Override
+ public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> features = new ArrayList<>();
+ // first get the common ancestor of both arguments...
+ ConllDependencyNode ancestor = null;
+ boolean hasCommonVerb = false;
+ outerloop:
+ for (ConllDependencyNode firstNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, arg1)) {//get the covered conll nodes within the first extent
+ String pos = firstNode.getPostag();
+ if(pos!=null && pos.startsWith("NN")){//get the head node
+ for(ConllDependencyNode nextNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, arg2)){//get the covered conll nodes within the next extent
+ pos = nextNode.getPostag();
+ if(pos!=null && pos.startsWith("NN")){//get the head node
+ ancestor = DependencyParseUtils.getCommonAncestor(firstNode, nextNode);
+ if(ancestor!=null && ancestor.getPostag().startsWith("VB")){
+ features.add(new Feature(FEAT_NAME, "both_events_governed_by_the_same_verb"));
+ features.add(new Feature(FEAT_NAME, ancestor.getDeprel()+"_"+ancestor.getCoveredText().toLowerCase()));
+ hasCommonVerb = true;
+ break outerloop;
+ }
+ }
+ }
+ }
+ }
+
+ if(!hasCommonVerb){//if arg1 and arg2 don't depend on the same verb, get their independent depending verbs
+ features.addAll(getDependingVerbFeature(jcas, arg1));
+ features.addAll(getDependingVerbFeature(jcas, arg2));
+ }
+
+ return features;
+ }
+
+ private static List<Feature> getDependingVerbFeature(JCas jcas, IdentifiedAnnotation arg) {
+ List<Feature> feats = new ArrayList<>();
+
+ for (ConllDependencyNode dnode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, arg)) {//get the covered conll nodes within the first extent
+ String pos = dnode.getPostag();
+ if(pos!=null && pos.startsWith("NN")){//get the head node
+ ConllDependencyNode verbNode = getVerbAncestor(dnode);
+ if(verbNode != null){//get verb node features
+ Integer typeId = -1;
+ if(arg instanceof EventMention){
+ EventMention mention = (EventMention) arg;
+ if(mention.getEvent() != null && mention.getEvent().getProperties() != null){
+ typeId = mention.getEvent().getProperties().getTypeIndexID();
+ }
+ }
+ feats.add(new Feature(FEAT_NAME, typeId+"_"+ verbNode.getPostag()));
+ String depRelation = dnode.getDeprel();
+ if(depRelation.startsWith("nsubj"))
+ feats.add(new Feature(FEAT_NAME, typeId+"_isASubject"));
+ else if(depRelation.startsWith("dobj"))
+ feats.add(new Feature(FEAT_NAME, typeId+"_isAnObject"));
+
+ break;
+ }
+ }
+ }
+ return feats;
+ }
+
+ private static ConllDependencyNode getVerbAncestor(ConllDependencyNode dnode) {
+ ConllDependencyNode verbNode = null;
+ ConllDependencyNode currNode = dnode;
+ while (currNode != null && currNode.getHead() != null) {
+ currNode = currNode.getHead();
+ String nodepos = currNode == null ? null : currNode.getPostag();
+ if(nodepos != null && nodepos.startsWith("VB")){
+ verbNode = currNode;
+ return verbNode;
+ }
+ }
+ return verbNode;
+ }
+
+
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DeterminerRelationFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DeterminerRelationFeaturesExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DeterminerRelationFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DeterminerRelationFeaturesExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+/**
+ * extract the nearby nearest time, date information for any event
+ * @author CH151862
+ *
+ */
+public class DeterminerRelationFeaturesExtractor implements RelationFeaturesExtractor {
+
+ private String name="Determiner";
+
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> features = Lists.newArrayList();
+
+ List<IdentifiedAnnotation> arguments = Lists.newArrayList();
+ arguments.add(arg1);
+ arguments.add(arg2);
+
+ for(IdentifiedAnnotation arg: arguments){
+ List<WordToken> tokens = JCasUtil.selectCovered(jCas, WordToken.class, arg);
+ for(WordToken word : tokens){
+ String pos = word.getPartOfSpeech();
+ if(pos.startsWith("DT")||pos.startsWith("PRP$")){
+// features.add(new Feature(this.name, "contains_DT"));
+ features.add(new Feature(this.name, pos));
+ break;
+ }
+ }
+ }
+
+ return features;
+ }
+}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventArgumentPropertyExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventArgumentPropertyExtractor.java?rev=1632167&r1=1632166&r2=1632167&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventArgumentPropertyExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventArgumentPropertyExtractor.java Wed Oct 15 19:29:44 2014
@@ -20,7 +20,9 @@ package org.apache.ctakes.temporal.ae.fe
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
@@ -32,52 +34,78 @@ import org.cleartk.ml.Feature;
import org.apache.uima.fit.util.JCasUtil;
public class EventArgumentPropertyExtractor implements
- RelationFeaturesExtractor {
+RelationFeaturesExtractor {
- @Override
- public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
- IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
- List<Feature> feats = new ArrayList<Feature>();
-
- Sentence coveringSent = JCasUtil.selectCovering(jCas, Sentence.class, arg1.getBegin(), arg1.getEnd()).get(0);
- List<EventMention> events = JCasUtil.selectCovered(EventMention.class, coveringSent);
- List<EventMention> realEvents = new ArrayList<EventMention>();
- for(EventMention event : events){
- // filter out ctakes events
- if(event.getClass().equals(EventMention.class)){
- realEvents.add(event);
- }
- }
- events = realEvents;
- EventMention anchor = events.get(0);
-
- if(arg1.getBegin() == anchor.getBegin() && arg1.getEnd() == anchor.getEnd()){
- feats.add(new Feature("Arg1LeftmostEvent"));
- }else if(arg2.getBegin() == anchor.getBegin() && arg2.getEnd() == anchor.getEnd()){
- feats.add(new Feature("Arg2LeftmostEvent"));
- }
-
- if(arg1 instanceof EventMention){
- feats.addAll(getEventFeats("mention1property", (EventMention)arg1));
- }
- if(arg2 instanceof EventMention){
- feats.addAll(getEventFeats("mention2property", (EventMention)arg2));
- }
- return feats;
- }
-
- private static Collection<? extends Feature> getEventFeats(String name, EventMention mention) {
- List<Feature> feats = new ArrayList<Feature>();
-
- feats.add(new Feature(name + "_modality", mention.getEvent().getProperties().getContextualModality()));
- feats.add(new Feature(name + "_aspect", mention.getEvent().getProperties().getContextualAspect()));
- feats.add(new Feature(name + "_permanence", mention.getEvent().getProperties().getPermanence()));
- feats.add(new Feature(name + "_polarity", mention.getEvent().getProperties().getPolarity()));
- feats.add(new Feature(name + "_category", mention.getEvent().getProperties().getCategory()));
- feats.add(new Feature(name + "_degree", mention.getEvent().getProperties().getDegree()));
- feats.add(new Feature(name + "_doctimerel", mention.getEvent().getProperties().getDocTimeRel()));
-
- return feats;
- }
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> feats = new ArrayList<>();
+
+ Set<Sentence> coveringSents = new HashSet<>();
+ if( arg1 instanceof EventMention){
+ coveringSents.addAll(JCasUtil.selectCovering(jCas, Sentence.class, arg1.getBegin(), arg1.getEnd()));
+ }else if(arg2 instanceof EventMention){
+ coveringSents.addAll(JCasUtil.selectCovering(jCas, Sentence.class, arg2.getBegin(), arg2.getEnd()));
+ }else{
+ return feats;
+ }
+
+ for(Sentence coveringSent : coveringSents){
+ List<EventMention> events = JCasUtil.selectCovered(EventMention.class, coveringSent);
+ List<EventMention> realEvents = new ArrayList<>();
+ for(EventMention event : events){
+ // filter out ctakes events
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ events = realEvents;
+ if( events.size()>0){
+ EventMention anchor = events.get(0);
+ if(arg1 == anchor){
+ feats.add(new Feature("Arg1LeftmostEvent"));
+ }else if(arg2 == anchor){
+ feats.add(new Feature("Arg2LeftmostEvent"));
+ }
+ }
+ }
+
+ if(arg1 instanceof EventMention){
+ feats.addAll(getEventFeats("mention1property", (EventMention)arg1));
+ }
+ if(arg2 instanceof EventMention){
+ feats.addAll(getEventFeats("mention2property", (EventMention)arg2));
+ }
+
+ return feats;
+ }
+
+ private static Collection<? extends Feature> getEventFeats(String name, EventMention mention) {
+ List<Feature> feats = new ArrayList<>();
+ //add contextual modality as a feature
+ if(mention.getEvent()==null || mention.getEvent().getProperties() == null){
+ return feats;
+ }
+ String contextualModality = mention.getEvent().getProperties().getContextualModality();
+ if (contextualModality != null)
+ feats.add(new Feature(name + "_modality", contextualModality));
+
+ // feats.add(new Feature(name + "_aspect", mention.getEvent().getProperties().getContextualAspect()));//null
+ // feats.add(new Feature(name + "_permanence", mention.getEvent().getProperties().getPermanence()));//null
+ Integer polarity = mention.getEvent().getProperties().getPolarity();
+ if(polarity!=null )
+ feats.add(new Feature(name + "_polarity", polarity));
+ // feats.add(new Feature(name + "_category", mention.getEvent().getProperties().getCategory()));//null
+ // feats.add(new Feature(name + "_degree", mention.getEvent().getProperties().getDegree()));//null
+ String docTimeRel = mention.getEvent().getProperties().getDocTimeRel();
+ if(docTimeRel!=null)
+ feats.add(new Feature(name + "_doctimerel", docTimeRel));
+
+ Integer typeId = mention.getEvent().getProperties().getTypeIndexID();
+ if(typeId != null)
+ feats.add(new Feature(name + "_typeId", typeId));
+
+ return feats;
+ }
}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionRelationFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionRelationFeaturesExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionRelationFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionRelationFeaturesExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+/**
+ * extract the nearby nearest time, date information for any event
+ * @author CH151862
+ *
+ */
+public class EventPositionRelationFeaturesExtractor implements RelationFeaturesExtractor {
+
+ private String name="EventRelaionPosition";
+
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> features = Lists.newArrayList();
+
+ List<EventMention> events = Lists.newArrayList();
+ if(arg1 instanceof EventMention) events.add((EventMention)arg1);
+ if(arg2 instanceof EventMention) events.add((EventMention)arg2);
+
+ Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+ List<Sentence> sentList = Lists.newArrayList();
+ sentList.addAll(sentences);
+ int sentSize = sentList.size();
+
+ //get covering segment:
+ Map<EventMention, Collection<Segment>> coveringMap =
+ JCasUtil.indexCovering(jCas, EventMention.class, Segment.class);
+
+ Collection<EventMention> allevents = JCasUtil.select(jCas, EventMention.class);
+ List<EventMention> eventList = Lists.newArrayList();
+ //filter events
+ for(EventMention eventa : allevents){
+ // filter out ctakes events
+ if(eventa.getClass().equals(EventMention.class)){
+ eventList.add(eventa);
+ }
+ }
+ int eventSize = eventList.size();
+
+ for(EventMention event: events){
+ //check if it within the last/first 5 sentences
+ if( sentSize >= 5){
+ if( containEvent(sentList.get(0), sentList.get(4), event)){
+ Feature feature = new Feature(this.name, "Within_Top_5_sentences");
+ features.add(feature);
+ }
+ if( containEvent(sentList.get(sentSize-5), sentList.get(sentSize-1), event)){
+ Feature feature = new Feature(this.name, "Within_Last_5_sentences");
+ features.add(feature);
+ }
+ }
+
+ //check if it within the last/first 5 events
+ if(eventSize >= 5){
+ if( containEvent(eventList.get(0), eventList.get(4), event)){
+ Feature feature = new Feature(this.name, "Within_Top_5_events");
+ features.add(feature);
+ }
+ if( containEvent(eventList.get(eventSize-5), eventList.get(eventSize-1), event)){
+ Feature feature = new Feature(this.name, "Within_Last_5_events");
+ features.add(feature);
+ }
+ }
+
+
+ Collection<Segment> segList = coveringMap.get(event);
+
+ //if an event is the first/last 3 events in its section
+ for(Segment seg : segList) {
+ String segname = seg.getId();
+ if (!segname.equals("SIMPLE_SEGMENT")){//remove simple segment
+ List<EventMention> segEvents = JCasUtil.selectCovered(jCas, EventMention.class, seg);
+ List<EventMention> realEvents = new ArrayList<>();
+ //filtering events
+ for(EventMention segevent : segEvents){
+ // filter out ctakes events
+ if(segevent.getClass().equals(EventMention.class)){
+ realEvents.add(segevent);
+ }
+ }
+ segEvents = realEvents;
+ int segEventSize = segEvents.size();
+ if(segEventSize >= 3){
+ if( containEvent(segEvents.get(0), segEvents.get(2), event)){
+ Feature feature = new Feature(this.name, "Within_Top_3_events_of"+segname);
+ features.add(feature);
+ }
+ if( containEvent(segEvents.get(segEventSize-3), segEvents.get(segEventSize-1), event)){
+ Feature feature = new Feature(this.name, "Within_Last_3_events_of"+segname);
+ features.add(feature);
+ }
+ }
+ }
+ }
+ }
+
+ return features;
+ }
+
+ private static boolean containEvent(Annotation arg1, Annotation arg2,
+ Annotation target) {
+ if(target.getBegin()>=arg1.getBegin() && target.getEnd()<= arg2.getEnd())
+ return true;
+ return false;
+ }
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationFeatureExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationFeatureExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.util.ViewUriUtil;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
+/**
+ * Given a pair of arguments, if any one of them is an event, get the related event-sectiontime as features
+ * @author CH151862
+ *
+ */
+public class EventTimeRelationFeatureExtractor implements
+RelationFeaturesExtractor {
+
+ private String cachedDocID = null;
+ private Multimap<EventMention, String> eventSectionTimeRelationLookup;
+
+ @SuppressWarnings("null")
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+
+ String docId=null;
+ try{
+ docId = ViewUriUtil.getURI(jCas).toString();// get docID
+ }catch(Exception e){
+ docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+ }
+ if(!docId.equals(cachedDocID)){
+ // rebuild cache
+ cachedDocID = docId;
+ rebuildCache(jCas);
+ }
+
+ ArrayList<Feature> feats = new ArrayList<>();
+
+ List<EventMention> events = Lists.newArrayList();
+ if(arg1 instanceof EventMention) events.add((EventMention)arg1);
+ if(arg2 instanceof EventMention) events.add((EventMention)arg2);
+ for(EventMention event: events){
+ for(String value : eventSectionTimeRelationLookup.get(event)){
+ feats.add(new Feature("hasEventTimeRelation_", value));
+ }
+ }
+
+ return feats;
+ }
+
+ private void rebuildCache(JCas jCas){
+ //get admission Time:
+ TimeMention admissionTime = null;
+ //may need better way to identify Discharge Time other than relative span information:
+ for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, 15, 30)) {
+ if(time.getTimeClass().equals("DATE")){
+ admissionTime = time;
+ break;
+ }
+ }
+ //get discharge Time id: T1:
+ TimeMention dischargeTime = null;
+ //may need better way to identify Discharge Time other than relative span information:
+ for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, 40, 60)) {
+ if(time.getTimeClass().equals("DATE")){
+ dischargeTime = time;
+ break;
+ }
+ }
+
+ eventSectionTimeRelationLookup = HashMultimap.create();
+ for (TemporalTextRelation relation : JCasUtil.select(jCas, TemporalTextRelation.class)) {
+// Annotation potentialSectTime = relation.getArg2().getArgument();
+// String relationTo = "";
+// if(potentialSectTime==admissionTime){
+// relationTo="admissionTime";
+// }else if(potentialSectTime == dischargeTime){
+// relationTo="dischargeTime";
+// }
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ Annotation event = null;
+ TimeMention time = null;
+ if( arg1 instanceof EventMention && arg2 instanceof TimeMention){
+ event = arg1;
+ time = (TimeMention)arg2;
+ }else if(arg2 instanceof EventMention && arg1 instanceof TimeMention){
+ event = arg2;
+ time = (TimeMention)arg1;
+ }
+ if(event != null && time !=null){
+ if(time == admissionTime){
+ eventSectionTimeRelationLookup.put((EventMention)event, "admissionTime_"+relation.getCategory());
+ }else if(time == dischargeTime){
+ eventSectionTimeRelationLookup.put((EventMention)event, "dischargeTime_"+relation.getCategory());
+ }else{
+ eventSectionTimeRelationLookup.put((EventMention)event, time.getTimeClass()+"_"+relation.getCategory());
+ }
+// Annotation potentialEvent = relation.getArg1().getArgument();
+// if(potentialEvent instanceof EventMention){
+// eventSectionTimeRelationLookup.put((EventMention)potentialEvent, relationTo+"_"+relation.getCategory());
+// }
+ }
+ }
+
+ }
+}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseRelationExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseRelationExtractor.java?rev=1632167&r1=1632166&r2=1632167&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseRelationExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseRelationExtractor.java Wed Oct 15 19:29:44 2014
@@ -38,14 +38,14 @@ public class NearbyVerbTenseRelationExtr
@Override
public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
- List<Feature> feats = new ArrayList<Feature>();
+ List<Feature> feats = new ArrayList<>();
//find event
- EventMention event = null;
+ List<EventMention> events = new ArrayList<>();
if(arg1 instanceof EventMention){
- event = (EventMention) arg1;
- }else if(arg1 instanceof EventMention){
- event = (EventMention) arg2;
+ events.add((EventMention) arg1);
+ }else if(arg2 instanceof EventMention){
+ events.add((EventMention) arg2);
}else{
return feats;
}
@@ -53,25 +53,40 @@ public class NearbyVerbTenseRelationExtr
//1 get covering sentence:
Map<EventMention, Collection<Sentence>> coveringMap =
JCasUtil.indexCovering(jcas, EventMention.class, Sentence.class);
- Collection<Sentence> sentList = coveringMap.get(event);
-
- //2 get Verb Tense
- if (sentList != null && !sentList.isEmpty()){
- for(Sentence sent : sentList) {
- String verbTP ="";
- for ( WordToken wt : JCasUtil.selectCovered(jcas, WordToken.class, sent)) {
- if (wt != null){
- String pos = wt.getPartOfSpeech();
- if (pos.startsWith("VB")){
- verbTP = verbTP + "_" + pos;
+
+ Sentence knowSentence = null;
+ String seenVbPattern = null;
+
+ for(EventMention event: events){
+ Collection<Sentence> sentList = coveringMap.get(event);
+
+ //2 get Verb Tense
+ if (sentList != null && !sentList.isEmpty()){
+ for(Sentence sent : sentList) {
+ String verbTP ="";
+ for ( WordToken wt : JCasUtil.selectCovered(jcas, WordToken.class, sent)) {
+ if (wt != null){
+ String pos = wt.getPartOfSpeech();
+ if (pos.startsWith("VB")){
+ verbTP = verbTP + "_" + pos;
+ }
}
}
+ Feature feature = new Feature("VerbTenseFeature", verbTP);
+ feats.add(feature);
+ //logger.info("found nearby verb's pos tag: "+ verbTP);
+
+ //check if the verb pattern is different from the old
+ if(knowSentence == null && !verbTP.equals("")){
+ knowSentence = sent;
+ seenVbPattern = verbTP;
+ }else if(knowSentence != null && knowSentence != sent && verbTP.equals(seenVbPattern)){
+ feature = new Feature("TwoSentenceShareTheSameVerbPattern", verbTP);
+ feats.add(feature);
+ }
}
- Feature feature = new Feature("VerbTenseFeature", verbTP);
- feats.add(feature);
- //logger.info("found nearby verb's pos tag: "+ verbTP);
- }
+ }
}
return feats;
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventTimeBetweenCandidatesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventTimeBetweenCandidatesExtractor.java?rev=1632167&r1=1632166&r2=1632167&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventTimeBetweenCandidatesExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventTimeBetweenCandidatesExtractor.java Wed Oct 15 19:29:44 2014
@@ -22,14 +22,23 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+/**
+ * Count the number of EventMention and TimeMention in between two arguments as features
+ * @author CH151862
+ *
+ */
public class NumberOfEventTimeBetweenCandidatesExtractor implements
RelationFeaturesExtractor {
@@ -55,21 +64,46 @@ RelationFeaturesExtractor {
int eventsInBetween = 0;
int timesInBetween = 0;
+ int wordsInBetween = 0;
+ int puncsInBetween = 0;
+// int numsInBetween = 0;
+// int newlineInBetween= 0;
List<EventMention> events = JCasUtil.selectCovered(jCas, EventMention.class, begin, end);
List<TimeMention> times = JCasUtil.selectCovered(jCas, TimeMention.class, begin, end);
+ List<WordToken> words = JCasUtil.selectCovered(jCas, WordToken.class, begin, end);
+ List<PunctuationToken>punc= JCasUtil.selectCovered(jCas, PunctuationToken.class, begin, end);
+// List<NumToken> numTokens = JCasUtil.selectCovered(jCas, NumToken.class, begin, end);
+// List<NewlineToken> newline= JCasUtil.selectCovered(jCas, NewlineToken.class, begin, end);
+
+ //filter out ctakes events
+// List<EventMention> realEvents = new ArrayList<>();
+// for(EventMention event : events){
+// // filter out ctakes events
+// if(event.getClass().equals(EventMention.class)){
+// realEvents.add(event);
+// }
+// }
+// events = realEvents;
eventsInBetween = events==null? 0: events.size();
+// if(eventsInBetween > 10) eventsInBetween = 10;
timesInBetween = times==null? 0: times.size();
+ wordsInBetween = words==null? 0 : words.size();
+ if(wordsInBetween > 20) wordsInBetween = 20;
+ puncsInBetween = punc == null? 0 : punc.size();
+ if(puncsInBetween > 5) puncsInBetween = 5;
+// numsInBetween = numTokens == null? 0 : numTokens.size();
+// newlineInBetween= newline == null? 0 : newline.size();
+
+
feats.add(new Feature("NumOfEvents_InBetween", eventsInBetween));
feats.add(new Feature("NumOfTimes_InBetween", timesInBetween));
feats.add(new Feature("NumOfEventsAndTimes_InBetween", timesInBetween+eventsInBetween));
+ feats.add(new Feature("NumOfWords_InBetween", wordsInBetween));
+ feats.add(new Feature("NumOfPunctuation_InBetween", puncsInBetween));
+// feats.add(new Feature("NumOfNumberToken_InBetween", numsInBetween));
+// feats.add(new Feature("NumOfNewLine_InBetween", newlineInBetween));
-// //print long distances
-// if (eventsInBetween >= 200){
-// System.out.println("++++++++++Long Distance Relation in "+ ViewUriUtil.getURI(jCas).toString() + "+++++++");
-// System.out.println("["+arg1.getCoveredText()+"] "+ jCas.getDocumentText().substring(arg1.getEnd(), arg2.getBegin()) +" ["+arg2.getCoveredText()+"]");
-// }
-
return feats;
}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventsInTheSameSentenceExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventsInTheSameSentenceExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventsInTheSameSentenceExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NumberOfEventsInTheSameSentenceExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+/**
+ * Count the number of EventMentions in the same sentence as the two arguments, used for features for within sentence event-event relation discovery
+ * @author CH151862
+ *
+ */
+public class NumberOfEventsInTheSameSentenceExtractor implements
+RelationFeaturesExtractor {
+
+ @SuppressWarnings("null")
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ ArrayList<Feature> feats = new ArrayList<>();
+
+ //get the sentence that covers the first argument, suppose arg1 and arg2 are within the same sentence
+ Map<IdentifiedAnnotation, Collection<Sentence>> coveringMap =
+ JCasUtil.indexCovering(jCas, IdentifiedAnnotation.class, Sentence.class);
+ Collection<Sentence> sentList = coveringMap.get(arg1);
+ if (sentList == null && sentList.isEmpty()) return feats;
+
+ for(Sentence sent : sentList) {
+ List<EventMention> events = JCasUtil.selectCovered(jCas, EventMention.class, sent);
+
+ //filter out ctakes events:
+ List<EventMention> realEvents = Lists.newArrayList();
+ for( EventMention event : events){
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ events = realEvents;
+
+ int eventsNum = events==null? 0: events.size();
+
+ //find the two arguments indices in the event list
+ int arg1Index = -1;
+ int arg2Index = -1;
+ for(int i=0; i< eventsNum; i++){
+ EventMention currentEvent = events.get(i);
+ if(currentEvent==arg1){
+ arg1Index = i;
+ }
+ if(currentEvent==arg2){
+ arg2Index = i;
+ }
+ if(arg1Index!=-1 && arg2Index!=-1){
+ break;
+ }
+ }
+// feats.add(new Feature("NumOfEvents_InTheSentence", eventsNum)); //tried this feature, but it is not helpful.
+ if(arg1Index!=-1 && arg2Index!=-1){
+// feats.add(new Feature("arg1_index", arg1Index));
+// feats.add(new Feature("arg2_index", arg2Index)); //tried these two features, but not helpful
+// if(arg1Index == 0 || arg2Index == 0){
+// feats.add(new Feature("EventPair_", "ContainsTheFirstEvent"));
+// }// tried this feature, but not helpful
+// if(arg1Index == eventsNum-1 || arg2Index == eventsNum-1){
+// feats.add(new Feature("EventPair_", "ContainsTheLastEvent"));
+// }// tired, not helpful
+ int dis = Math.abs(arg1Index-arg2Index);
+ feats.add(new Feature("EventPair_Distance", dis));
+ if(dis == eventsNum-1){
+ feats.add(new Feature("EventPair_", "Major"));
+ }
+ if(dis==1){
+ feats.add(new Feature("EventPair_", "Consecutive"));
+ }
+ }else{
+ System.out.println("Same sentence events matching error.");
+ }
+ }
+ return feats;
+ }
+
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/OverlappedHeadFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/OverlappedHeadFeaturesExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/OverlappedHeadFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/OverlappedHeadFeaturesExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,79 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+/**
+ * Extract the overlapping head words of two arguments. Head words: the NNs of NP + the VBs of VP
+ * @author CH151862
+ *
+ */
+public class OverlappedHeadFeaturesExtractor extends TokenFeaturesExtractor {
+
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation mention1, IdentifiedAnnotation mention2)
+ throws AnalysisEngineProcessException {
+ List<Feature> features = new ArrayList<>();
+ Annotation arg1 = mention1;
+ Annotation arg2 = mention2;
+
+ String featName = "overlappingHeadTerms";
+
+ //iterate through the tokens of two arguments
+ List<WordToken> currentTokens = JCasUtil.selectCovered(jCas, WordToken.class, arg1);
+ List<WordToken> nextTokens = JCasUtil.selectCovered(jCas, WordToken.class, arg2);
+
+ int headSize1 = 0;
+ int headSize2 = 0;
+ int headSize = 0;
+ int longHeadSize = 0;
+ int matches = 0;
+ for(WordToken t1: currentTokens){
+ String t1_pos = t1.getPartOfSpeech();
+ if(t1_pos.startsWith("NN")||t1_pos.startsWith("VB")){
+ headSize1 ++;
+ for(WordToken t2: nextTokens){
+ String t2_pos = t2.getPartOfSpeech();
+ if(t2_pos.startsWith("NN")||t2_pos.startsWith("VB")){
+ headSize2 ++;
+ String t1str = t1.getCanonicalForm();
+ String t2str = t2.getCanonicalForm();
+ if(t1str.equals(t2str)){
+ features.add(new Feature(featName+"_CanoticalForm", t1str));
+ features.add(new Feature(featName+"_length", t1str.length()));
+ features.add(new Feature(featName+"_POS", t1_pos));
+ matches++;
+ }
+ }
+ }
+ }
+ }
+ if(matches > 0){
+ headSize = Math.min(headSize1, headSize2);
+ longHeadSize = Math.max(headSize1, headSize2);
+
+ //feature of counting times of matches
+ features.add(new Feature(featName+"_count", matches));
+
+ //ratio of the count of matches to the shorter length of tokens between the two arguments
+ float matchShortRatio = (float)matches/headSize;
+ features.add(new Feature(featName+"_shortRatio", matchShortRatio));
+
+ //ratio of the count of matches to the longer length of tokens between the two arguments
+ float matchLongRatio = (float)matches/longHeadSize;
+ features.add(new Feature(featName+"_longRatio", matchLongRatio));
+ }
+
+ return features;
+ }
+
+}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderRelationExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderRelationExtractor.java?rev=1632167&r1=1632166&r2=1632167&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderRelationExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderRelationExtractor.java Wed Oct 15 19:29:44 2014
@@ -32,36 +32,71 @@ import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;
import org.apache.uima.fit.util.JCasUtil;
+import com.google.common.collect.Lists;
+
public class SectionHeaderRelationExtractor implements RelationFeaturesExtractor{
@Override
public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
- List<Feature> feats = new ArrayList<Feature>();
+ List<Feature> feats = new ArrayList<>();
//find event
- EventMention event = null;
+ EventMention eventA = null;
+ EventMention eventB = null;
if(arg1 instanceof EventMention){
- event = (EventMention) arg1;
- }else if(arg1 instanceof EventMention){
- event = (EventMention) arg2;
- }else{
+ eventA = (EventMention) arg1;
+ }
+ if(arg2 instanceof EventMention){
+ eventB = (EventMention) arg2;
+ }
+
+ if(eventA==null && eventB==null){
return feats;
}
//get covering segment set:
Map<EventMention, Collection<Segment>> coveringMap =
JCasUtil.indexCovering(jcas, EventMention.class, Segment.class);
- Collection<Segment> segList = coveringMap.get(event);
-
+ List<Segment> segListA = Lists.newArrayList();
+ List<Segment> segListB = Lists.newArrayList();
+ if(eventA != null){
+ for ( Segment seg : coveringMap.get(eventA)){
+ if (!seg.getId().equals("SIMPLE_SEGMENT")){//remove simple segment
+ segListA.add(seg);
+ }
+ }
+ }
+ if(eventB != null){
+ for ( Segment seg : coveringMap.get(eventB)){
+ if (!seg.getId().equals("SIMPLE_SEGMENT")){//remove simple segment
+ segListB.add(seg);
+ }
+ }
+ }
+
//get segment id
- if (segList != null && !segList.isEmpty()){
- for(Segment seg : segList) {
- String segname = seg.getId();
- Feature feature = new Feature("SegmentID", segname);
- feats.add(feature);
+ List<String> segANames = Lists.newArrayList();
+ List<String> segBNames = Lists.newArrayList();
+ for(Segment seg : segListA) {
+ String segname = seg.getId();
+ Feature feature = new Feature("SegmentID_arg1", segname);
+ feats.add(feature);
+ segANames.add(segname);
+ }
+ for(Segment seg : segListB) {
+ String segname = seg.getId();
+ Feature feature = new Feature("SegmentID_arg2", segname);
+ feats.add(feature);
+ segBNames.add(segname);
+ }
+ for(String segA : segANames){
+ for(String segB : segBNames){
+ if(segA.equals(segB)){
+ Feature feature = new Feature("InTheSameSegment_", segA);
+ feats.add(feature);
+ }
}
-
}
return feats;
}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SpecialAnnotationRelationExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SpecialAnnotationRelationExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SpecialAnnotationRelationExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SpecialAnnotationRelationExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+public class SpecialAnnotationRelationExtractor implements RelationFeaturesExtractor{
+
+ @Override
+ public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> feats = new ArrayList<>();
+ EventMention eventA = null;
+ EventMention eventB = null;
+
+ //find event
+ if(arg1 instanceof EventMention){
+ eventA = (EventMention) arg1;
+ }else if(arg2 instanceof EventMention){
+ eventB = (EventMention) arg2;
+ }else{
+ return feats;
+ }
+
+ //1 get covering sentence:
+ Map<EventMention, Collection<Sentence>> coveringMap =
+ JCasUtil.indexCovering(jcas, EventMention.class, Sentence.class);
+
+ Sentence sentenceA = getCoveringSentence(eventA, coveringMap);
+ Sentence sentenceB = getCoveringSentence(eventB, coveringMap);
+
+ //get all special annotation that is in sentence A
+ List<RangeAnnotation> rangeInA = new ArrayList<>();
+ List<FractionAnnotation> fracInA = new ArrayList<>();
+ List<RomanNumeralAnnotation> romanInA = new ArrayList<>();
+ List<MeasurementAnnotation> measureInA = new ArrayList<>();
+ if(sentenceA != null){
+ rangeInA.addAll(JCasUtil.selectCovered(jcas, RangeAnnotation.class, sentenceA));
+ if(!rangeInA.isEmpty()) feats.add(new Feature("arg1_has_nearby_", "RangeAnnotation"));
+ fracInA.addAll(JCasUtil.selectCovered(jcas, FractionAnnotation.class, sentenceA));
+ if(!fracInA.isEmpty()) feats.add(new Feature("arg1_has_nearby_", "FractionAnnotation"));
+ romanInA.addAll(JCasUtil.selectCovered(jcas, RomanNumeralAnnotation.class, sentenceA));
+ if(!romanInA.isEmpty()) feats.add(new Feature("arg1_has_nearby_", "RomanNumeralAnnotation"));
+ measureInA.addAll(JCasUtil.selectCovered(jcas, MeasurementAnnotation.class, sentenceA));
+ if(!measureInA.isEmpty()) feats.add(new Feature("arg1_has_nearby_", "MeasurementAnnotation"));
+ }
+
+ //get all special annotation that is in sentence B
+ List<RangeAnnotation> rangeInB = new ArrayList<>();
+ List<FractionAnnotation> fracInB = new ArrayList<>();
+ List<RomanNumeralAnnotation> romanInB = new ArrayList<>();
+ List<MeasurementAnnotation> measureInB = new ArrayList<>();
+ if(sentenceB != null){
+ rangeInB.addAll(JCasUtil.selectCovered(jcas, RangeAnnotation.class, sentenceB));
+ if(!rangeInB.isEmpty()) feats.add(new Feature("arg2_has_nearby_", "RangeAnnotation"));
+ fracInB.addAll(JCasUtil.selectCovered(jcas, FractionAnnotation.class, sentenceB));
+ if(!fracInB.isEmpty()) feats.add(new Feature("arg2_has_nearby_", "FractionAnnotation"));
+ romanInB.addAll(JCasUtil.selectCovered(jcas, RomanNumeralAnnotation.class, sentenceB));
+ if(!romanInB.isEmpty()) feats.add(new Feature("arg2_has_nearby_", "RomanNumeralAnnotation"));
+ measureInB.addAll(JCasUtil.selectCovered(jcas, MeasurementAnnotation.class, sentenceB));
+ if(!measureInB.isEmpty()) feats.add(new Feature("arg2_has_nearby_", "MeasurementAnnotation"));
+ }
+
+ //check if annotation matches
+ if(sentenceA != null && sentenceB != null && sentenceA != sentenceB){
+ range:
+ for(RangeAnnotation rangeA: rangeInA){
+ for(RangeAnnotation rangeB: rangeInB){
+ if(rangeA.getCoveredText().equalsIgnoreCase(rangeB.getCoveredText())){
+ feats.add(new Feature("shareCommonRange_", rangeB.getCoveredText().toLowerCase()));
+ break range;
+ }
+ }
+ }
+
+ fraction:
+ for(FractionAnnotation fracA: fracInA){
+ for(FractionAnnotation fracB: fracInB){
+ if(fracA.getCoveredText().equalsIgnoreCase(fracB.getCoveredText())){
+ feats.add(new Feature("shareCommonFraction_", fracB.getCoveredText().toLowerCase()));
+ break fraction;
+ }
+ }
+ }
+
+ number:
+ for(RomanNumeralAnnotation numA: romanInA){
+ for(RomanNumeralAnnotation numB: romanInB){
+ if(numA.getCoveredText().equalsIgnoreCase(numB.getCoveredText())){
+ feats.add(new Feature("shareCommonFraction_", numB.getCoveredText().toLowerCase()));
+ break number;
+ }
+ }
+ }
+
+ measure:
+ for(MeasurementAnnotation meaA: measureInA){
+ for(MeasurementAnnotation meaB: measureInB){
+ if(meaA.getCoveredText().equalsIgnoreCase(meaB.getCoveredText())){
+ feats.add(new Feature("shareCommonFraction_", meaB.getCoveredText().toLowerCase()));
+ break measure;
+ }
+ }
+ }
+ }
+
+
+
+ return feats;
+ }
+
+ private static Sentence getCoveringSentence(EventMention event,
+ Map<EventMention, Collection<Sentence>> coveringMap) {
+ List<Sentence> sentList = new ArrayList<>();
+ if(event != null){
+ sentList.addAll(coveringMap.get(event));
+ if(!sentList.isEmpty()){
+ return(sentList.get(0));
+ }
+ }
+ return null;
+ }
+
+
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETFlatExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETFlatExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETFlatExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETFlatExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,175 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+public class TemporalPETFlatExtractor implements RelationFeaturesExtractor {
+
+ private static final String FEAT_NAME = "Flat_DPK";
+
+ @Override
+ public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> features = new ArrayList<>();
+ // first get the root and print it out...
+ TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(jcas, AnnotationTreeUtils.getAnnotationTree(jcas, arg1));
+
+ if(root == null){
+ SimpleTree fakeTree = new SimpleTree("(S (NN null))");
+ features.add(new Feature(FEAT_NAME, fakeTree.toString()));
+ return features;
+ }
+
+ // swap the order if necessary:
+ if(arg2.getBegin() <= arg1.getBegin() && arg2.getEnd() <= arg1.getEnd()){
+ IdentifiedAnnotation temp = arg1;
+ arg1 = arg2;
+ arg2 = temp;
+ }
+
+ String a1type="", a2type="";
+ String eventModality="";
+ String timeClass;
+
+ if(arg1 instanceof EventMention){
+ EventMention mention = (EventMention) arg1;
+ if(mention.getEvent() != null && mention.getEvent().getProperties() != null){
+ eventModality = mention.getEvent().getProperties().getContextualModality();
+ }
+ a1type = "EVENT-"+eventModality;
+ }else if(arg1 instanceof TimeMention){
+ timeClass = ((TimeMention)arg1).getTimeClass();
+ a1type = "TIMEX-"+timeClass;
+ }
+
+ if(arg2 instanceof EventMention){
+ EventMention mention = (EventMention) arg2;
+ if(mention.getEvent() != null && mention.getEvent().getProperties() != null){
+ eventModality = mention.getEvent().getProperties().getContextualModality();
+ }
+ a2type = "EVENT-"+eventModality;
+ }else if(arg2 instanceof TimeMention){
+ timeClass = ((TimeMention)arg2).getTimeClass();
+ a2type = "TIMEX-"+timeClass;
+ }
+
+ TreebankNode t1 = AnnotationTreeUtils.insertAnnotationNode(jcas, root, arg1, "ARG1-"+a1type);
+ TreebankNode t2 = AnnotationTreeUtils.insertAnnotationNode(jcas, root, arg2, "ARG2-"+a2type);
+
+ // addOtherTimes(jcas,root, arg1, arg2);
+
+ SimpleTree tree = null;
+ if(t1.getBegin() <= t2.getBegin() && t1.getEnd() >= t2.getEnd()){
+ // t1 encloses t2
+ tree = TreeExtractor.getSimpleClone(t1);
+ }else if(t2.getBegin() <= t1.getBegin() && t2.getEnd() >= t1.getEnd()){
+ // t2 encloses t1
+ tree = TreeExtractor.getSimpleClone(t2);
+ }else{
+ tree = TreeExtractor.extractPathEnclosedTree(t1, t2, jcas);
+ }
+
+ tree.setGeneralizeLeaf(true);
+ moveTimexDownToNP(tree);
+ simplifyGCG(tree);
+
+ features.addAll(traverseTreeForDPath(tree));
+ return features;
+ }
+
+ private List<Feature> traverseTreeForDPath(SimpleTree tree) {
+ List<Feature> features = new ArrayList<>();
+ String rootStr = tree.cat;
+ features.add(new Feature(FEAT_NAME, rootStr));//add length 0 DPK
+ if(tree.children.size() == 1 && tree.children.get(0).children.size() == 0){//if tree is a leaf
+ features.add(new Feature(FEAT_NAME, rootStr + "-" + tree.children.get(0).cat));
+ }else{//if tree is not a leaf
+ for(SimpleTree subtree: tree.children){
+ features.addAll(traverseTreeForDPath(subtree));
+ for(String str: getSubTreeStrings(subtree)){
+ features.add(new Feature(FEAT_NAME, rootStr+"-"+str));
+ }
+ }
+ }
+ return features;
+ }
+
+ private List<String> getSubTreeStrings(SimpleTree subtree) {
+ List<String> subTreeStrings = new ArrayList<>();
+ subTreeStrings.add(subtree.cat);
+ if(subtree.children.size() == 1 && subtree.children.get(0).children.size() == 0){//if subtree is a leaf
+ subTreeStrings.add(subtree.cat + "-" + subtree.children.get(0).cat);
+ }else{ //if subtree is not a leaf
+ for(SimpleTree subsubTree: subtree.children){
+ for(String str: getSubTreeStrings(subsubTree)){
+ subTreeStrings.add(subtree.cat+"-"+str);
+ }
+ }
+ }
+ return subTreeStrings;
+ }
+
+ public static void addOtherTimes(JCas jcas, TopTreebankNode root, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+ List<TimeMention> timexes = JCasUtil.selectCovered(TimeMention.class, root);
+
+ for(TimeMention timex : timexes){
+ // don't want the same timex that we are looking at the argument for...
+ if(timex.getBegin() == arg1.getBegin() && timex.getEnd() == arg1.getEnd() ||
+ timex.getBegin() == arg2.getBegin() && timex.getEnd() == arg2.getEnd()) continue;
+ // but make sure it is in the correct span...
+ if(timex.getBegin() > arg1.getBegin() && timex.getEnd() <= arg2.getBegin()){
+ AnnotationTreeUtils.insertAnnotationNode(jcas, root, timex, "TIMEX");
+ }
+ }
+ }
+
+ public static void moveTimexDownToNP(SimpleTree tree) {
+ if(tree.cat.contains("-TIMEX-")){
+ SimpleTree child = tree.children.get(0);
+ // we've found the correct node: only correct clear violations: PP -> IN NP
+ if(child.cat.contains("PP") && child.children.size() == 2 && child.children.get(0).cat.equals("IN") && child.children.get(1).cat.startsWith("NP")){
+ // swap labels
+ String fullCat = tree.cat;
+ tree.cat = "PP";
+ child.cat = fullCat;
+
+ // now point the new PP at the preposition and stop the new TIMEX from pointing at it:
+ tree.children.add(0, child.children.get(0));
+ child.children.remove(0);
+ }
+ return;
+ }else if(tree.children == null || tree.children.size() == 0){
+ return;
+ }
+ // if we are not there we have to return
+ for(SimpleTree child : tree.children){
+ moveTimexDownToNP(child);
+ }
+ }
+
+ public static void simplifyGCG(SimpleTree tree){
+ if(tree.children == null || tree.children.size() == 0) return;
+
+ int ampInd = tree.cat.indexOf('+');
+ if(ampInd > 0){
+ tree.cat = tree.cat.substring(0, ampInd);
+ }
+ for(SimpleTree child : tree.children){
+ simplifyGCG(child);
+ }
+ }
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXPropertyRelationFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXPropertyRelationFeaturesExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXPropertyRelationFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXPropertyRelationFeaturesExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+/**
+ * Check is an timex argument is in the begining/end of a sentence
+ * @author CH151862
+ *
+ */
+public class TimeXPropertyRelationFeaturesExtractor implements RelationFeaturesExtractor {
+
+ private String name="TimeXProperty";
+
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> features = Lists.newArrayList();
+
+ List<TimeMention> times = Lists.newArrayList();
+ if(arg1 instanceof TimeMention) times.add((TimeMention)arg1);
+ if(arg2 instanceof TimeMention) times.add((TimeMention)arg2);
+
+ Map<TimeMention, Collection<Sentence>> coveringMap =
+ JCasUtil.indexCovering(jCas, TimeMention.class, Sentence.class);
+
+ for(TimeMention time : times){
+ Collection<Sentence> sentList = coveringMap.get(time);
+
+ //get time class
+ String timeclass = time.getTimeClass();
+ if(timeclass != null) features.add(new Feature("TimeClass", timeclass));
+
+ //check if it contains only one word
+ List<WordToken> timewords = Lists.newArrayList();
+ timewords.addAll(JCasUtil.selectCovered(jCas, WordToken.class, time));
+ if(timewords.size()==1){
+ features.add(new Feature(name, "one_word"));
+ }
+
+ //get position in sentence
+ for(Sentence sent : sentList) {
+ if(sent.getBegin()==time.getBegin() && time.getEnd()>=(sent.getEnd()-2)){
+ features.add(new Feature(name, "entire_sentence"));
+ }else if(sent.getBegin()==time.getBegin()){
+ features.add(new Feature(name, "beginning_of_sentence"));
+ }else if(time.getEnd()>=(sent.getEnd()-2)){//allow a sentence to have a space and a period.
+ features.add(new Feature(name, "end_of_sentence"));
+ }
+ }
+ }
+
+ return features;
+ }
+
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXRelationFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXRelationFeaturesExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXRelationFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXRelationFeaturesExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+/**
+ * extract the nearby nearest time, date information for any event
+ * @author CH151862
+ *
+ */
+public class TimeXRelationFeaturesExtractor implements RelationFeaturesExtractor {
+
+ private String name="TimeXRelatioinFeature";
+
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> features = Lists.newArrayList();
+
+ List<EventMention> events = Lists.newArrayList();
+ if(arg1 instanceof EventMention) events.add((EventMention)arg1);
+ if(arg2 instanceof EventMention) events.add((EventMention)arg2);
+
+ Map<EventMention, Collection<Sentence>> coveringMap =
+ JCasUtil.indexCovering(jCas, EventMention.class, Sentence.class);
+
+ for(EventMention event : events){
+ Collection<Sentence> sentList = coveringMap.get(event);
+
+ //2 get TimeX
+ Map<Integer, IdentifiedAnnotation> timeDistMap = new TreeMap<>();
+
+ for(Sentence sent : sentList) {
+ for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sent)) {
+ timeDistMap.put(Math.abs(time.getBegin() - event.getBegin()), time);
+ }
+ for (TimeAnnotation time : JCasUtil.selectCovered(jCas, TimeAnnotation.class, sent)) {
+ timeDistMap.put(Math.abs(time.getBegin() - event.getBegin()), time);
+ }
+ for (DateAnnotation time : JCasUtil.selectCovered(jCas, DateAnnotation.class, sent)) {
+ timeDistMap.put(Math.abs(time.getBegin() - event.getBegin()), time);
+ }
+ }
+
+ //get the closest Time Expression feature
+ for (Map.Entry<Integer, IdentifiedAnnotation> entry : timeDistMap.entrySet()) {
+ Feature feature = new Feature(this.name, entry.getValue().getCoveredText());
+ features.add(feature);
+ // logger.info("add time feature: "+ entry.getValue().getCoveredText() + entry.getValue().getTimeClass());
+ Feature indicator = new Feature("TimeXNearby", this.name);
+ features.add(indicator);
+ Feature type = new Feature("TimeXType", entry.getValue().getClass());
+ features.add(type);
+
+ //add PP get Heading preposition
+ for(TreebankNode treebankNode : JCasUtil.selectCovering(
+ jCas,
+ TreebankNode.class,
+ entry.getValue().getBegin(),
+ entry.getValue().getEnd())) {
+
+ if(treebankNode.getNodeType().equals("PP")) {
+ Feature PPNodeType = new Feature("Timex_PPNodeType", treebankNode.getNodeType());
+ features.add(PPNodeType);
+ String value = treebankNode.getNodeValue();
+ if(value != null)
+ features.add(new Feature("Timex_PPNodeValue_", value));
+ features.add(new Feature("PP_Timex_", entry.getValue().getClass().getCanonicalName()));
+ break;
+ }
+ }
+
+ break;
+
+ }
+ }
+
+ return features;
+ }
+
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TokenPropertyFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TokenPropertyFeaturesExtractor.java?rev=1632167&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TokenPropertyFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TokenPropertyFeaturesExtractor.java Wed Oct 15 19:29:44 2014
@@ -0,0 +1,126 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
+import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
+import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.apache.uima.fit.util.JCasUtil;
+
+public class TokenPropertyFeaturesExtractor implements
+RelationFeaturesExtractor {
+
+ @SuppressWarnings("null")
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ ArrayList<Feature> feats = new ArrayList<>();
+
+ List<BaseToken> arg1Tokens = JCasUtil.selectCovered(jCas, BaseToken.class, arg1);
+ List<BaseToken> arg2Tokens = JCasUtil.selectCovered(jCas, BaseToken.class, arg2);
+
+ int arg1Length = arg1Tokens == null ? 0 : arg1Tokens.size();
+ int arg2Length = arg2Tokens == null ? 0 : arg2Tokens.size();
+
+ //get token size feature:
+ feats.add(new Feature("arg1_tokenSize", arg1Length));
+ feats.add(new Feature("arg2_tokenSize", arg2Length));
+
+ for(BaseToken bt : arg1Tokens){
+ if(bt.getPartOfSpeech().startsWith("VB")){
+ feats.add(new Feature("arg1_contains", "VB"));
+ break;
+ }
+ }
+ for(BaseToken bt : arg2Tokens){
+ if(bt.getPartOfSpeech().startsWith("VB")){
+ feats.add(new Feature("arg2_contains", "VB"));
+ break;
+ }
+ }
+
+ if(arg1Length == 0 || arg2Length == 0){
+ return feats;
+ }
+
+ //check if the last token match:
+ String arg1last = arg1Tokens.get(arg1Length-1).getCoveredText().toLowerCase();
+ String arg2last = arg2Tokens.get(arg2Length-1).getCoveredText().toLowerCase();
+ if(arg1last.equals(arg2last)){
+ feats.add(new Feature("contain_matching_last_token", true));
+ feats.add(new Feature("matching_last_token_", arg1last));
+ }
+
+ //check if contains the same anatomical sites:
+ List<AnatomicalSiteMention> arg1anaSites = JCasUtil.selectCovered(jCas, AnatomicalSiteMention.class, arg1);
+ List<AnatomicalSiteMention> arg2anaSites = JCasUtil.selectCovered(jCas, AnatomicalSiteMention.class, arg2);
+ for(AnatomicalSiteMention siteA : arg1anaSites){
+ for(AnatomicalSiteMention siteB : arg2anaSites){
+ if(siteA.getCoveredText().equalsIgnoreCase(siteB.getCoveredText())){
+ feats.add(new Feature("contain_matching_anatomicalSite", true));
+ feats.add(new Feature("matching_anatomicalSite_", siteA.getCoveredText().toLowerCase()));
+ }
+ }
+ }
+
+ //check if contains the same procedure:
+ List<ProcedureMention> arg1procedure = JCasUtil.selectCovered(jCas, ProcedureMention.class, arg1);
+ List<ProcedureMention> arg2procedure = JCasUtil.selectCovered(jCas, ProcedureMention.class, arg2);
+ for(ProcedureMention proA : arg1procedure){
+ for(ProcedureMention proB : arg2procedure){
+ if(proA.getCoveredText().equalsIgnoreCase(proB.getCoveredText())){
+ feats.add(new Feature("contain_matching_Procedure", true));
+ feats.add(new Feature("matching_Procedure_", proA.getCoveredText().toLowerCase()));
+ }
+ }
+ }
+
+ //check if contains the same Sign and Symptom:
+ List<SignSymptomMention> arg1ss = JCasUtil.selectCovered(jCas, SignSymptomMention.class, arg1);
+ List<SignSymptomMention> arg2ss = JCasUtil.selectCovered(jCas, SignSymptomMention.class, arg2);
+ for(SignSymptomMention ssA : arg1ss){
+ for(SignSymptomMention ssB : arg2ss){
+ if(ssA.getCoveredText().equalsIgnoreCase(ssB.getCoveredText())){
+ feats.add(new Feature("contain_matching_SignSymptom", true));
+ feats.add(new Feature("matching_SignSymptom_", ssA.getCoveredText().toLowerCase()));
+ }
+ }
+ }
+
+ //check if contains the same Disease Disorder:
+ List<DiseaseDisorderMention> arg1dd = JCasUtil.selectCovered(jCas, DiseaseDisorderMention.class, arg1);
+ List<DiseaseDisorderMention> arg2dd = JCasUtil.selectCovered(jCas, DiseaseDisorderMention.class, arg2);
+ for(DiseaseDisorderMention ddA : arg1dd){
+ for(DiseaseDisorderMention ddB : arg2dd){
+ if(ddA.getCoveredText().equalsIgnoreCase(ddB.getCoveredText())){
+ feats.add(new Feature("contain_matching_DiseaseDisorder", true));
+ feats.add(new Feature("matching_DiseaseDisorder_", ddA.getCoveredText().toLowerCase()));
+ }
+ }
+ }
+
+ //check if contains the same Medication:
+ List<MedicationMention> arg1med = JCasUtil.selectCovered(jCas, MedicationMention.class, arg1);
+ List<MedicationMention> arg2med = JCasUtil.selectCovered(jCas, MedicationMention.class, arg2);
+ for(MedicationMention medA : arg1med){
+ for(MedicationMention medB : arg2med){
+ if(medA.getCoveredText().equalsIgnoreCase(medB.getCoveredText())){
+ feats.add(new Feature("contain_matching_Medication", true));
+ feats.add(new Feature("matching_Medication_", medA.getCoveredText().toLowerCase()));
+ }
+ }
+ }
+
+ return feats;
+ }
+
+}