You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/01/25 22:30:03 UTC

svn commit: r1438724 - /incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/

Author: swu
Date: Fri Jan 25 21:30:02 2013
New Revision: 1438724

URL: http://svn.apache.org/viewvc?rev=1438724&view=rev
Log:
make assertion analysis engines capable of downsampling

Modified:
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -233,7 +233,7 @@ public abstract class AssertionCleartkAn
     DocumentID documentId = JCasUtil.selectSingle(jCas, DocumentID.class);
     if (documentId != null)
     {
-      logger.info("processing next doc: " + documentId.getDocumentID());
+      logger.debug("processing next doc: " + documentId.getDocumentID());
     } else
     {
       logger.warn("processing next doc (doc id is null)");

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -18,7 +18,9 @@
  */
 package org.apache.ctakes.assertion.medfacts.cleartk;
 
+import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.Instance;
 
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -27,13 +29,26 @@ public class ConditionalCleartkAnalysisE
 		AssertionCleartkAnalysisEngine {
 
 	@Override
+	public void initialize(UimaContext context) throws ResourceInitializationException {
+		super.initialize(context);
+		probabilityOfKeepingADefaultExample = 1.0;
+	}
+	
+	@Override
 	public void setClassLabel(IdentifiedAnnotation entityMention,
 			Instance<String> instance) throws AnalysisEngineProcessException {
 		if (this.isTraining())
 	      {
 	        String conditional = (entityMention.getConditional()) ? "conditional" : "nonconditional";
+	        
+	        // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+	        if ("nonconditional".equals(conditional) 
+	        		&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+	        	return;
+	        }
 	        instance.setOutcome(conditional);
 	        this.dataWriter.write(instance);
+
 	      } else
 	      {
 	        String label = this.classifier.classify(instance.getFeatures());

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -36,7 +36,8 @@ public class GenericCleartkAnalysisEngin
 	@Override
 	public void initialize(UimaContext context) throws ResourceInitializationException {
 		super.initialize(context);
-		
+		probabilityOfKeepingADefaultExample = 1.0;
+	
 //		if (this.isTraining() && this.goldViewName == null) {
 //			throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
 //		}
@@ -66,6 +67,12 @@ public class GenericCleartkAnalysisEngin
 		if (this.isTraining())
 	      {
 	        String generic = entityMention.getGeneric()? "1":"0";
+
+	        // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+	        if ("0".equals(generic) 
+	        		&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+	        	return;
+	        }
 	        instance.setOutcome(generic);
 	        this.dataWriter.write(instance);
 	      } else

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -28,11 +28,16 @@ public class PolarityCleartkAnalysisEngi
 	      if (this.isTraining())
 	      {
 	        String polarity = (entityMention.getPolarity() == -1) ? "negated" : "present";
-	        instance.setOutcome(polarity);
+	        // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
 	        if ("negated".equals(polarity))
 	        {
 	          logger.debug("TRAINING: " + polarity);
 	        }
+	        if ("present".equals(polarity) 
+	        		&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+	        	return;
+	        }
+	        instance.setOutcome(polarity);
 	        this.dataWriter.write(instance);
 	      } else
 	      {

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -51,6 +51,7 @@ public class SubjectCleartkAnalysisEngin
 	@Override
 	public void initialize(UimaContext context) throws ResourceInitializationException {
 		super.initialize(context);
+		probabilityOfKeepingADefaultExample = 1.0;
 
 		if (this.isTraining() && this.goldViewName == null) {
 			throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
@@ -81,13 +82,19 @@ public class SubjectCleartkAnalysisEngin
 		if (this.isTraining())
 	      {
 	        String subj = entityMention.getSubject();
+	        
+	        // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+	        if ("patient".equals(subj) 
+	        		&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+	        	return;
+	        }
 	        instance.setOutcome(subj);
+	        this.dataWriter.write(instance);
 	        logger.log(Level.DEBUG,  String.format("[%s] expected: ''; actual: ''; features: %s",
 		      		  this.getClass().getSimpleName(),
 		      		  instance.toString()
 		      		  //StringUtils.join(instance.getFeatures(), ", ")
 		      		  ));
-	        this.dataWriter.write(instance);
 	      } else
 	      {
 	        String label = this.classifier.classify(instance.getFeatures());

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -18,7 +18,9 @@
  */
 package org.apache.ctakes.assertion.medfacts.cleartk;
 
+import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.Instance;
 
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -26,10 +28,22 @@ import org.apache.ctakes.typesystem.type
 public class UncertaintyCleartkAnalysisEngine extends AssertionCleartkAnalysisEngine {
 
 	@Override
+	public void initialize(UimaContext context) throws ResourceInitializationException {
+		super.initialize(context);
+		probabilityOfKeepingADefaultExample = 1.0;
+	}
+	
+	@Override
 	public void setClassLabel(IdentifiedAnnotation entityMention, Instance<String> instance) throws AnalysisEngineProcessException {
 		if (this.isTraining())
 	      {
 	        String uncertainty = (entityMention.getUncertainty() == 1) ? "uncertain" : "certain";
+
+	        // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+	        if ("certain".equals(uncertainty) 
+	        		&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+	        	return;
+	        }
 	        instance.setOutcome(uncertainty);
 	        this.dataWriter.write(instance);
 	      } else