You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/01/15 20:29:52 UTC

svn commit: r1433595 - /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/

Author: dligach
Date: Tue Jan 15 19:29:51 2013
New Revision: 1433595

URL: http://svn.apache.org/viewvc?rev=1433595&view=rev
Log:
Implemented baseline 3 for degree_of

Added:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java
Modified:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java

Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java?rev=1433595&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java Tue Jan 15 19:29:51 2013
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.ae.baselines;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.CleartkProcessingException;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * Annotate location_of relation between two entities whenever 
+ * they are enclosed within the same noun phrse.
+ */
+public class Baseline3DegreeOfRelationExtractorAnnotator extends RelationExtractorAnnotator {
+
+	@Override
+	public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+			JCas identifiedAnnotationView, Sentence sentence) {
+		
+		List<EntityMention> entities = JCasUtil.selectCovered(identifiedAnnotationView, EntityMention.class, sentence);
+		List<Modifier> modifiers = JCasUtil.selectCovered(identifiedAnnotationView, Modifier.class, sentence);
+		
+		List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
+		for (EntityMention entity : entities) {
+			for (Modifier modifier : modifiers) {
+				pairs.add(new IdentifiedAnnotationPair(entity, modifier));
+			}
+		}
+		
+    // find pairs enclosed inside a noun phrase
+    List<IdentifiedAnnotationPair> result = new ArrayList<IdentifiedAnnotationPair>();
+    for(IdentifiedAnnotationPair pair : pairs) {
+      if(Utils.validateDegreeOfArgumentTypes(pair)) {
+        for(TreebankNode nounPhrase : Utils.getNounPhrases(identifiedAnnotationView, sentence)) {
+          if(Utils.isEnclosed(pair, nounPhrase)) {
+            IdentifiedAnnotation arg1 = pair.getArg1();
+            IdentifiedAnnotation arg2 = pair.getArg2();
+            result.add(new IdentifiedAnnotationPair(arg1, arg2));
+            System.out.println("NP: " + nounPhrase.getCoveredText() + ", " + nounPhrase.getBegin() + ", " + nounPhrase.getEnd());
+            System.out.println("arg1: " + arg1.getCoveredText() + ", " + arg1.getBegin() + ", " + arg1.getEnd());
+            System.out.println("arg2: " + arg2.getCoveredText() + ", " + arg2.getBegin() + ", " + arg2.getEnd());
+            System.out.println();
+            break; // don't check other NPs
+          }
+        }
+      }
+    }
+    
+    return result;
+	}
+
+	@Override
+	protected String getRelationCategory(
+			Map<List<Annotation>, BinaryTextRelation> relationLookup,
+			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+		return (relation != null) ? relation.getCategory() : NO_RELATION_CATEGORY;
+	}
+
+  @Override
+  public String classify(List<Feature> features) throws CleartkProcessingException {
+    return this.classifier.classify(features);
+  }
+}

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java?rev=1433595&r1=1433594&r2=1433595&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java Tue Jan 15 19:29:51 2013
@@ -29,8 +29,6 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.classifier.Feature;
@@ -84,8 +82,8 @@ public class Baseline3EntityMentionPairR
 		List<IdentifiedAnnotationPair> result = new ArrayList<IdentifiedAnnotationPair>();
 		for(IdentifiedAnnotationPair pair : pairs) {
 		  if(Utils.validateLocationOfArgumentTypes(pair)) {
-		    for(TreebankNode nounPhrase : getNounPhrases(identifiedAnnotationView, sentence)) {
-		      if(isEnclosed(pair, nounPhrase)) {
+		    for(TreebankNode nounPhrase : Utils.getNounPhrases(identifiedAnnotationView, sentence)) {
+		      if(Utils.isEnclosed(pair, nounPhrase)) {
 		        IdentifiedAnnotation arg1 = pair.getArg1();
 		        IdentifiedAnnotation arg2 = pair.getArg2();
 		        result.add(new IdentifiedAnnotationPair(arg1, arg2));
@@ -101,51 +99,7 @@ public class Baseline3EntityMentionPairR
 		
 		return result;
 	}
-	
-	/*
-	 * Is this pair of entities enclosed inside a noun phrase?
-	 */
-	boolean isEnclosed(IdentifiedAnnotationPair pair, TreebankNode np) {
-	  
-    IdentifiedAnnotation arg1 = pair.getArg1();
-    IdentifiedAnnotation arg2 = pair.getArg2();
-
-    if((np.getBegin() <= arg1.getBegin()) &&
-        (np.getEnd() >= arg1.getEnd()) &&
-        (np.getBegin() <= arg2.getBegin()) &&
-        (np.getEnd() >= arg2.getEnd())) {
-      return true;
-    }
-    
-    return false;
-	}
-	
-	/**
-	 * Get all noun phrases in a sentence.
-	 */
-	List<TreebankNode> getNounPhrases(JCas identifiedAnnotationView, Sentence sentence) {
-	  
-	  List<TreebankNode> nounPhrases = new ArrayList<TreebankNode>();
-	  List<TreebankNode> treebankNodes;
-	  try {
-      treebankNodes = JCasUtil.selectCovered(
-          identifiedAnnotationView.getView(CAS.NAME_DEFAULT_SOFA), 
-          TreebankNode.class,
-          sentence);
-    } catch (CASException e) {
-      treebankNodes = new ArrayList<TreebankNode>();
-      System.out.println("couldn't get default sofa");
-    }
-	  
-	  for(TreebankNode treebankNode : treebankNodes) {
-	    if(treebankNode.getNodeType().equals("NP")) {
-	      nounPhrases.add(treebankNode);
-	    }
-	  }
-	  
-	  return nounPhrases;	  
-	}
-	
+		
 	@Override
 	protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation> relationLookup,
 			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java?rev=1433595&r1=1433594&r2=1433595&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java Tue Jan 15 19:29:51 2013
@@ -1,12 +1,17 @@
 package org.apache.ctakes.relationextractor.ae.baselines;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.uimafit.util.JCasUtil;
 
@@ -67,4 +72,48 @@ public class Utils {
     List<BaseToken> baseTokens = JCasUtil.selectBetween(jCas, BaseToken.class, pair.getArg1(), pair.getArg2());
     return baseTokens.size();
   }
+  
+  /**
+   * Is this pair of entities enclosed inside a noun phrase?
+   */
+  public static boolean isEnclosed(IdentifiedAnnotationPair pair, TreebankNode np) {
+    
+    IdentifiedAnnotation arg1 = pair.getArg1();
+    IdentifiedAnnotation arg2 = pair.getArg2();
+
+    if((np.getBegin() <= arg1.getBegin()) &&
+        (np.getEnd() >= arg1.getEnd()) &&
+        (np.getBegin() <= arg2.getBegin()) &&
+        (np.getEnd() >= arg2.getEnd())) {
+      return true;
+    }
+    
+    return false;
+  }
+  
+  /**
+   * Get all noun phrases in a sentence.
+   */
+  public static List<TreebankNode> getNounPhrases(JCas identifiedAnnotationView, Sentence sentence) {
+    
+    List<TreebankNode> nounPhrases = new ArrayList<TreebankNode>();
+    List<TreebankNode> treebankNodes;
+    try {
+      treebankNodes = JCasUtil.selectCovered(
+          identifiedAnnotationView.getView(CAS.NAME_DEFAULT_SOFA), 
+          TreebankNode.class,
+          sentence);
+    } catch (CASException e) {
+      treebankNodes = new ArrayList<TreebankNode>();
+      System.out.println("couldn't get default sofa");
+    }
+    
+    for(TreebankNode treebankNode : treebankNodes) {
+      if(treebankNode.getNodeType().equals("NP")) {
+        nounPhrases.add(treebankNode);
+      }
+    }
+    
+    return nounPhrases;   
+  }
 }