You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/10/14 22:59:25 UTC
svn commit: r1631884 -
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java
Author: tmill
Date: Tue Oct 14 20:59:25 2014
New Revision: 1631884
URL: http://svn.apache.org/r1631884
Log:
Cleanup uncertainty AE, static accessors to uncertainty analysis engine, use constants.
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java?rev=1631884&r1=1631883&r2=1631884&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java Tue Oct 14 20:59:25 2014
@@ -18,32 +18,67 @@
*/
package org.apache.ctakes.assertion.medfacts.cleartk;
+import static org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine.FEATURE_CONFIG.PTK;
+import static org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine.FEATURE_CONFIG.PTK_FRAGS;
+import static org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine.FEATURE_CONFIG.STK;
+import static org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine.FEATURE_CONFIG.STK_FRAGS;
+
import java.io.File;
+import java.io.FileNotFoundException;
import java.net.URI;
import java.util.ArrayList;
import org.apache.ctakes.assertion.attributes.features.selection.Chi2FeatureSelection;
import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
import org.apache.ctakes.assertion.medfacts.cleartk.extractors.AboveLeftFragmentExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.AssertionAboveLeftTreeExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.AssertionDependencyTreeExtractor;
import org.apache.ctakes.assertion.medfacts.cleartk.extractors.ContextWordWindowExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.DependencyPathRegexpFeatureExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.DependencyWordsFragmentExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.UncertaintyFeatureExtractor;
+import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.Instance;
-import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
public class UncertaintyCleartkAnalysisEngine extends AssertionCleartkAnalysisEngine {
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
- probabilityOfKeepingADefaultExample = 0.1;
+ probabilityOfKeepingADefaultExample = 0.5;
if(this.entityFeatureExtractors == null){
- this.entityFeatureExtractors = new ArrayList<FeatureExtractor1>();
+ this.entityFeatureExtractors = new ArrayList<>();
}
this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/uncertainty.txt"));
- this.entityFeatureExtractors.add(new AboveLeftFragmentExtractor("ALUncertainty", "org/apache/ctakes/assertion/models/sharpUncertaintyFrags.txt"));
+ this.entityFeatureExtractors.add(new UncertaintyFeatureExtractor());
+ try {
+ this.entityFeatureExtractors.add(new DependencyPathRegexpFeatureExtractor());
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ throw new ResourceInitializationException(e);
+ }
+
+ if(featConfig == STK_FRAGS){
+ this.entityFeatureExtractors.add(new AboveLeftFragmentExtractor("AL_Unc", "org/apache/ctakes/assertion/models/jbi_paper_unc_seed_frags.txt"));
+ }
+
+ if(featConfig == PTK_FRAGS){
+ this.entityFeatureExtractors.add(new DependencyWordsFragmentExtractor("DW_Uncertainty", "org/apache/ctakes/assertion/models/jbi_paper_uncertainty_dw_frags.txt"));
+ }
+ if(featConfig == STK){
+ this.entityTreeExtractors.add(new AssertionAboveLeftTreeExtractor());
+ }
+
+ if(featConfig == PTK){
+ this.entityTreeExtractors.add(new AssertionDependencyTreeExtractor());
+ }
initializeFeatureSelection();
@@ -53,7 +88,7 @@ public class UncertaintyCleartkAnalysisE
public void setClassLabel(IdentifiedAnnotation entityOrEventMention, Instance<String> instance) throws AnalysisEngineProcessException {
if (this.isTraining())
{
- String uncertainty = (entityOrEventMention.getUncertainty() == 1) ? "uncertain" : "certain";
+ String uncertainty = (entityOrEventMention.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT) ? "uncertain" : "certain";
// downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
if ("certain".equals(uncertainty)
@@ -68,17 +103,17 @@ public class UncertaintyCleartkAnalysisE
int uncertainty = 0;
if (label!= null && label.equals("uncertain"))
{
- uncertainty = 1;
+ uncertainty = CONST.NE_UNCERTAINTY_PRESENT;
} else if (label != null && label.equals("certain"))
{
- uncertainty = 0;
+ uncertainty = CONST.NE_UNCERTAINTY_ABSENT;
}
entityOrEventMention.setUncertainty(uncertainty);
}
}
public static FeatureSelection<String> createFeatureSelection(double threshold) {
- return new Chi2FeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false);
+ return new Chi2FeatureSelection<>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false);
// return new MutualInformationFeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME);
}
@@ -91,7 +126,7 @@ public class UncertaintyCleartkAnalysisE
if (featureSelectionThreshold == 0) {
this.featureSelection = null;
} else {
- this.featureSelection = this.createFeatureSelection(this.featureSelectionThreshold);
+ this.featureSelection = createFeatureSelection(this.featureSelectionThreshold);
// if ( (new File(this.featureSelectionURI)).exists() ) {
// try {
@@ -103,5 +138,15 @@ public class UncertaintyCleartkAnalysisE
}
}
-
+ public static AnalysisEngineDescription createAnnotatorDescription(String modelPath) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription(UncertaintyCleartkAnalysisEngine.class,
+ AssertionCleartkAnalysisEngine.PARAM_FEATURE_CONFIG,
+ AssertionCleartkAnalysisEngine.FEATURE_CONFIG.DEP_REGEX,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ modelPath);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return createAnnotatorDescription("/org/apache/ctakes/assertion/models/uncertainty/model.jar");
+ }
}