You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/01/25 22:30:03 UTC
svn commit: r1438724 -
/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/
Author: swu
Date: Fri Jan 25 21:30:02 2013
New Revision: 1438724
URL: http://svn.apache.org/viewvc?rev=1438724&view=rev
Log:
make assertion analysis engines capable of downsampling
Modified:
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java
Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -233,7 +233,7 @@ public abstract class AssertionCleartkAn
DocumentID documentId = JCasUtil.selectSingle(jCas, DocumentID.class);
if (documentId != null)
{
- logger.info("processing next doc: " + documentId.getDocumentID());
+ logger.debug("processing next doc: " + documentId.getDocumentID());
} else
{
logger.warn("processing next doc (doc id is null)");
Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -18,7 +18,9 @@
*/
package org.apache.ctakes.assertion.medfacts.cleartk;
+import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.Instance;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -27,13 +29,26 @@ public class ConditionalCleartkAnalysisE
AssertionCleartkAnalysisEngine {
@Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ probabilityOfKeepingADefaultExample = 1.0;
+ }
+
+ @Override
public void setClassLabel(IdentifiedAnnotation entityMention,
Instance<String> instance) throws AnalysisEngineProcessException {
if (this.isTraining())
{
String conditional = (entityMention.getConditional()) ? "conditional" : "nonconditional";
+
+ // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+ if ("nonconditional".equals(conditional)
+ && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+ return;
+ }
instance.setOutcome(conditional);
this.dataWriter.write(instance);
+
} else
{
String label = this.classifier.classify(instance.getFeatures());
Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -36,7 +36,8 @@ public class GenericCleartkAnalysisEngin
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
-
+ probabilityOfKeepingADefaultExample = 1.0;
+
// if (this.isTraining() && this.goldViewName == null) {
// throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
// }
@@ -66,6 +67,12 @@ public class GenericCleartkAnalysisEngin
if (this.isTraining())
{
String generic = entityMention.getGeneric()? "1":"0";
+
+ // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+ if ("0".equals(generic)
+ && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+ return;
+ }
instance.setOutcome(generic);
this.dataWriter.write(instance);
} else
Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -28,11 +28,16 @@ public class PolarityCleartkAnalysisEngi
if (this.isTraining())
{
String polarity = (entityMention.getPolarity() == -1) ? "negated" : "present";
- instance.setOutcome(polarity);
+ // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
if ("negated".equals(polarity))
{
logger.debug("TRAINING: " + polarity);
}
+ if ("present".equals(polarity)
+ && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+ return;
+ }
+ instance.setOutcome(polarity);
this.dataWriter.write(instance);
} else
{
Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -51,6 +51,7 @@ public class SubjectCleartkAnalysisEngin
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
+ probabilityOfKeepingADefaultExample = 1.0;
if (this.isTraining() && this.goldViewName == null) {
throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
@@ -81,13 +82,19 @@ public class SubjectCleartkAnalysisEngin
if (this.isTraining())
{
String subj = entityMention.getSubject();
+
+ // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+ if ("patient".equals(subj)
+ && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+ return;
+ }
instance.setOutcome(subj);
+ this.dataWriter.write(instance);
logger.log(Level.DEBUG, String.format("[%s] expected: ''; actual: ''; features: %s",
this.getClass().getSimpleName(),
instance.toString()
//StringUtils.join(instance.getFeatures(), ", ")
));
- this.dataWriter.write(instance);
} else
{
String label = this.classifier.classify(instance.getFeatures());
Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java?rev=1438724&r1=1438723&r2=1438724&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java (original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java Fri Jan 25 21:30:02 2013
@@ -18,7 +18,9 @@
*/
package org.apache.ctakes.assertion.medfacts.cleartk;
+import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.Instance;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -26,10 +28,22 @@ import org.apache.ctakes.typesystem.type
public class UncertaintyCleartkAnalysisEngine extends AssertionCleartkAnalysisEngine {
@Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ probabilityOfKeepingADefaultExample = 1.0;
+ }
+
+ @Override
public void setClassLabel(IdentifiedAnnotation entityMention, Instance<String> instance) throws AnalysisEngineProcessException {
if (this.isTraining())
{
String uncertainty = (entityMention.getUncertainty() == 1) ? "uncertain" : "certain";
+
+ // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+ if ("certain".equals(uncertainty)
+ && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+ return;
+ }
instance.setOutcome(uncertainty);
this.dataWriter.write(instance);
} else