You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/01/14 15:12:43 UTC

svn commit: r1432925 - in /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor: ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java eval/RelationExtractorEvaluation.java

Author: dligach
Date: Mon Jan 14 14:12:43 2013
New Revision: 1432925

URL: http://svn.apache.org/viewvc?rev=1432925&view=rev
Log:
Initial version of baseline 3

Added:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java
Modified:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java

Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java?rev=1432925&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java Mon Jan 14 14:12:43 2013
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.ae;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * Annotate location_of relation between two entities in sentences containing
+ * exactly two entities (where the entities are of the correct types).
+ * This implementation assumes classifyBothDirections is set to true (i.e.
+ * each pair of entities is considered twice).
+ */
+public class Baseline3EntityMentionPairRelationExtractorAnnotator extends RelationExtractorAnnotator {
+	
+	public static final String PARAM_CLASSIFY_BOTH_DIRECTIONS = "ClassifyBothDirections";
+
+	@ConfigurationParameter(
+			name = PARAM_CLASSIFY_BOTH_DIRECTIONS,
+			mandatory = false,
+			description = "run the classifier in both directions, that is, classify each pair of events "
+					+ "{X,Y} once in the order X-to-Y and once in the order Y-to-X (default: classify each "
+					+ "pair of events {X, Y} once, giving the label 'R' if a relation exists with the order "
+					+ "X-to-Y, and 'R-1' if a relation exists with the order Y-to-X)")
+	protected boolean classifyBothDirections = false;
+
+	@Override
+	public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+			JCas identifiedAnnotationView, Sentence sentence) {
+
+		// collect all possible relation arguments from the sentence
+		List<EntityMention> args = JCasUtil.selectCovered(
+				identifiedAnnotationView,
+				EntityMention.class,
+				sentence);
+
+		// Create pairings (this will change depending on the classification direction)
+		List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
+		
+		for (int i = 0; i < args.size(); ++i) {
+			EntityMention arg1 = args.get(i);
+			int jStart = this.classifyBothDirections ? 0 : i + 1;
+			for (int j = jStart; j < args.size(); ++j) {
+        EntityMention arg2 = args.get(j);
+			  // skip identical entity mentions and mentions with identical spans
+				if (i == j || (arg1.getBegin() == arg2.getBegin() && arg1.getEnd() == arg2.getEnd())) {
+				  continue;
+				}
+				pairs.add(new IdentifiedAnnotationPair(arg1, arg2));
+			}
+		}
+
+		List<EntityMention> anatomicalSites = new ArrayList<EntityMention>();
+		for(EntityMention entityMention : args) {
+		  if(entityMention.getTypeID() == 6) {
+		    anatomicalSites.add(entityMention);
+		  }
+		}
+		
+		if(anatomicalSites.size() == 0) {
+		  return new ArrayList<IdentifiedAnnotationPair>();
+		}
+		
+		for(EntityMention anatomicalSite : anatomicalSites) {
+		  try {
+        List<TreebankNode> treebankNodes = JCasUtil.selectCovering(
+            identifiedAnnotationView.getView(CAS.NAME_DEFAULT_SOFA), 
+            TreebankNode.class, 
+            anatomicalSite.getBegin(), 
+            anatomicalSite.getEnd());
+        for(TreebankNode treebankNode : treebankNodes) {
+          if(treebankNode.getNodeType().equals("NP")) {
+            FSArray fsArray = treebankNode.getChildren();
+            if(fsArray == null) {
+              System.out.println("NULL");
+            } else {
+              for(FeatureStructure featureStructure : fsArray.toArray()) {
+                TreebankNode childNode = (TreebankNode) featureStructure;
+                System.out.println(sentence.getCoveredText());
+                System.out.println("anatomical site: " + anatomicalSite.getCoveredText());
+                System.out.println("child node: " + childNode.getCoveredText());
+                System.out.println();
+              }
+            }
+          }
+        }
+      } catch (CASException e) {
+        e.printStackTrace();
+      }
+		}
+
+		// for all other cases, return no entity pairs
+		return new ArrayList<IdentifiedAnnotationPair>();
+	}
+	
+	/*
+	 * Are entity types of the arguments valid for location_of? 
+	 * The following combinations are allowed:
+	 * 
+	 * location-of(anatomical site/6, disorder/2)
+   * location-of(anatomical site/6, sign/symptom/3)
+   * location-of(anatomical site/6, procedure/5)
+	 */
+	private static boolean validateArgumentTypes(IdentifiedAnnotationPair pair) {
+	  
+    // allowable arg2 types for location_of
+    HashSet<Integer> okArg2Types = new HashSet<Integer>(Arrays.asList(2, 3, 5));
+    
+	  IdentifiedAnnotation arg1 = pair.getArg1(); // Argument (should be anatomical site)
+	  IdentifiedAnnotation arg2 = pair.getArg2(); // Related_to (should be either disorder, sign/symptom, or procedure)
+	  int type1 = arg1.getTypeID();
+	  int type2 = arg2.getTypeID();
+	  
+	  if(type1 == 6 && okArg2Types.contains(type2)) {
+	    return true;
+	  }
+	  
+	  return false;
+	}
+	
+	@Override
+	protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation> relationLookup,
+			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+
+		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+		String category;
+		if (this.classifyBothDirections) {
+			// if classifying both directions, we'll see {X, Y} once when X is first and
+			// once when Y is first, so just do the single direction lookup here
+			if (relation != null) {
+				category = relation.getCategory();
+			} else if (coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
+			  category = NO_RELATION_CATEGORY;
+			} else {
+			  category = null;
+			}
+		} else {
+			// if classifying in a single direction, we'll see {X, Y} only once,
+			// so do lookups in both directions, and change the category name for
+			// the relations in the reverse order
+			if (relation != null) {
+			  category = relation.getCategory();
+			} else {
+				relation = relationLookup.get(Arrays.asList(arg2, arg1));
+				if (relation != null) {
+					// Change category name to show reverse order
+				  category = relation.getCategory() + "-1";
+				} else if (coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
+				  category = NO_RELATION_CATEGORY;
+				} else {
+				  category = null;
+				}
+			}
+		}
+		return category;
+	}
+
+  @Override
+  public String classify(List<Feature> features) {
+    return "location_of";
+  }
+}

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1432925&r1=1432924&r2=1432925&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Mon Jan 14 14:12:43 2013
@@ -28,6 +28,15 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.ctakes.relationextractor.ae.Baseline3EntityMentionPairRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -72,15 +81,6 @@ import com.google.common.collect.Maps;
 import com.google.common.collect.Ordering;
 import com.google.common.collect.Sets;
 
-import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
-import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.relation.RelationArgument;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.Modifier;
-
 public class RelationExtractorEvaluation extends Evaluation_ImplBase<File, AnnotationStatistics<String>> {
 
   public static class Options extends Options_ImplBase {
@@ -138,7 +138,7 @@ public class RelationExtractorEvaluation
   
   // parameter settings currently optimized for SHARP data
   private static final ParameterSettings BEST_DEGREE_OF_PARAMETERS = new ParameterSettings(false, 0.5f, "radial basis function", 10.0, 0.0010);
-  private static final ParameterSettings BEST_NON_DEGREE_OF_PARAMETERS = new ParameterSettings(false, 1.0f, "radial basis function", 10.0, 0.01);
+  private static final ParameterSettings BEST_NON_DEGREE_OF_PARAMETERS = new ParameterSettings(true, 1.0f, "radial basis function", 10.0, 0.01);
   
   public static void main(String[] args) throws Exception {
     Options options = new Options();
@@ -163,7 +163,7 @@ public class RelationExtractorEvaluation
       boolean isDegreeOf = relationCategory.equals("degree_of");
       Class<? extends RelationExtractorAnnotator> annotatorClass = isDegreeOf
           ? DegreeOfRelationExtractorAnnotator.class
-          : EntityMentionPairRelationExtractorAnnotator.class;
+          : Baseline3EntityMentionPairRelationExtractorAnnotator.class;
 
       // determine the type of classifier to be trained
       Class<? extends DataWriter<String>> dataWriterClass = LIBSVMStringOutcomeDataWriter.class;