You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/01/25 00:26:50 UTC

svn commit: r1438262 - in /incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk: AssertionCleartkAnalysisEngine.java PolarityCleartkAnalysisEngine.java SubjectCleartkAnalysisEngine.java

Author: swu
Date: Thu Jan 24 23:26:50 2013
New Revision: 1438262

URL: http://svn.apache.org/viewvc?rev=1438262&view=rev
Log:
some cosmetic changes, and adding the downsampling field.  not implemented yet.

Modified:
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1438262&r1=1438261&r2=1438262&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java Thu Jan 24 23:26:50 2013
@@ -24,6 +24,7 @@ import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 
 import org.apache.uima.jcas.tcas.Annotation;
 
@@ -101,6 +102,17 @@ public abstract class AssertionCleartkAn
      defaultValue = "false")
   boolean printErrors;
   
+  public static final String PARAM_PROBABILITY_OF_KEEPING_DEFAULT_EXAMPLE = "ProbabilityOfKeepingADefaultExample";
+
+  @ConfigurationParameter(
+      name = PARAM_PROBABILITY_OF_KEEPING_DEFAULT_EXAMPLE,
+      mandatory = false,
+      description = "probability that a default example should be retained for training")
+  protected double probabilityOfKeepingADefaultExample = 1.0;
+  
+  protected Random coin = new Random(0);
+
+  
   public ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) {
 		
 	    for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
@@ -203,7 +215,7 @@ public abstract class AssertionCleartkAn
       logger.info("processing next doc: " + documentId.getDocumentID());
     } else
     {
-      logger.info("processing next doc (doc id is null)");
+      logger.warn("processing next doc (doc id is null)");
     }
 //    // get gold standard relation instances during testing for error analysis
 //    if (! this.isTraining() && printErrors) {
@@ -217,20 +229,20 @@ public abstract class AssertionCleartkAn
 //      //categoryLookup = createCategoryLookup(goldView); 
 //    }
     
-    JCas identifiedAnnotationView, relationView;
+    JCas identifiedAnnotationView;
     if (this.isTraining()) {
       try {
-        identifiedAnnotationView = relationView = jCas.getView(this.goldViewName);
+        identifiedAnnotationView = jCas.getView(this.goldViewName);
       } catch (CASException e) {
         throw new AnalysisEngineProcessException(e);
       }
     } else {
-      identifiedAnnotationView = relationView = jCas;
+      identifiedAnnotationView = jCas;
     }
 
 
-    Map<IdentifiedAnnotation, Collection<Sentence>> coveringSentenceMap = JCasUtil.indexCovering(identifiedAnnotationView, IdentifiedAnnotation.class, Sentence.class);
-    Map<Sentence, Collection<BaseToken>> tokensCoveredInSentenceMap = JCasUtil.indexCovered(identifiedAnnotationView, Sentence.class, BaseToken.class);
+//    Map<IdentifiedAnnotation, Collection<Sentence>> coveringSentenceMap = JCasUtil.indexCovering(identifiedAnnotationView, IdentifiedAnnotation.class, Sentence.class);
+//    Map<Sentence, Collection<BaseToken>> tokensCoveredInSentenceMap = JCasUtil.indexCovered(identifiedAnnotationView, Sentence.class, BaseToken.class);
 
     List<Instance<String>> instances = new ArrayList<Instance<String>>();
     // generate a list of training instances for each sentence in the document
@@ -243,7 +255,7 @@ public abstract class AssertionCleartkAn
       }
       if (entityMention.getPolarity() == -1)
       {
-        logger.info(String.format(" - identified annotation: [%d-%d] polarity %d (%s)",
+        logger.debug(String.format(" - identified annotation: [%d-%d] polarity %d (%s)",
             entityMention.getBegin(),
             entityMention.getEnd(),
             entityMention.getPolarity(),
@@ -255,6 +267,8 @@ public abstract class AssertionCleartkAn
 //      instance.addAll(tokenFeatureExtractor.extract(jCas, entityMention));
 
       // extract all features that require the token and sentence annotations
+
+      /*** Commented by SWU 01/24/13 -- doesn't seem to be used
       Collection<Sentence> sentenceList = coveringSentenceMap.get(entityMention);
       Sentence sentence = null;
       if (sentenceList == null || sentenceList.isEmpty())
@@ -273,6 +287,7 @@ public abstract class AssertionCleartkAn
       {
         sentence = sentenceList.iterator().next();
       }
+      */
       //Sentence sentence = sentenceList.iterator().next();
       
       /*

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java?rev=1438262&r1=1438261&r2=1438262&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java Thu Jan 24 23:26:50 2013
@@ -31,7 +31,7 @@ public class PolarityCleartkAnalysisEngi
 	        instance.setOutcome(polarity);
 	        if ("negated".equals(polarity))
 	        {
-	          logger.info("TRAINING: " + polarity);
+	          logger.debug("TRAINING: " + polarity);
 	        }
 	        this.dataWriter.write(instance);
 	      } else
@@ -40,7 +40,7 @@ public class PolarityCleartkAnalysisEngi
 	        int polarity = 1;
 	        if (label!= null && label.equals("present"))
 	        {
-	          polarity = 0;
+	          polarity = 1;
 	        } else if (label != null && label.equals("negated"))
 	        {
 	          polarity = -1;
@@ -48,7 +48,7 @@ public class PolarityCleartkAnalysisEngi
 	        entityMention.setPolarity(polarity);
 	        if ("negated".equals(label))
 	        {
-	          logger.info(String.format("DECODING/EVAL: %s//%s [%d-%d] (%s)", label, polarity, entityMention.getBegin(), entityMention.getEnd(), entityMention.getClass().getName()));
+	          logger.debug(String.format("DECODING/EVAL: %s//%s [%d-%d] (%s)", label, polarity, entityMention.getBegin(), entityMention.getEnd(), entityMention.getClass().getName()));
 	        }
 	      }
 	}

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java?rev=1438262&r1=1438261&r2=1438262&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java Thu Jan 24 23:26:50 2013
@@ -82,7 +82,7 @@ public class SubjectCleartkAnalysisEngin
 	      {
 	        String subj = entityMention.getSubject();
 	        instance.setOutcome(subj);
-	        logger.log(Level.INFO,  String.format("[%s] expected: ''; actual: ''; features: %s",
+	        logger.log(Level.DEBUG,  String.format("[%s] expected: ''; actual: ''; features: %s",
 		      		  this.getClass().getSimpleName(),
 		      		  instance.toString()
 		      		  //StringUtils.join(instance.getFeatures(), ", ")
@@ -92,7 +92,7 @@ public class SubjectCleartkAnalysisEngin
 	      {
 	        String label = this.classifier.classify(instance.getFeatures());
 	        entityMention.setSubject(label);
-	        logger.log(Level.INFO, "SUBJECT is being set on an IdentifiedAnnotation: "+label+" "+entityMention.getSubject());
+	        logger.log(Level.DEBUG, "SUBJECT is being set on an IdentifiedAnnotation: "+label+" "+entityMention.getSubject());
 	      }
 	}