You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/10/30 17:06:02 UTC
svn commit: r1635530 - in /ctakes/trunk:
ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/conditional/
ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/generic/
ctakes-assertion-res/src/main/resources...
Author: tmill
Date: Thu Oct 30 16:06:02 2014
New Revision: 1635530
URL: http://svn.apache.org/r1635530
Log:
CTAKES-94: New models and cleand up code for Generic, History, Conditional analysis engines.
Added:
ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/conditional/
ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/conditional/model.jar (with props)
ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/generic/
ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/generic/model.jar (with props)
ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/historyOf/
ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/historyOf/model.jar (with props)
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/GenericFeaturesExtractor.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
Added: ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/conditional/model.jar
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/conditional/model.jar?rev=1635530&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/conditional/model.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/generic/model.jar
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/generic/model.jar?rev=1635530&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/generic/model.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/historyOf/model.jar
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/historyOf/model.jar?rev=1635530&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/historyOf/model.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/GenericFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/GenericFeaturesExtractor.java?rev=1635530&r1=1635529&r2=1635530&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/GenericFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/GenericFeaturesExtractor.java Thu Oct 30 16:06:02 2014
@@ -27,8 +27,8 @@ import java.util.List;
import org.apache.ctakes.assertion.attributes.generic.GenericAttributeClassifier;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.extractor.FeatureExtractor1;
@@ -39,13 +39,13 @@ import org.cleartk.ml.feature.extractor.
* @author m081914
*
*/
-public class GenericFeaturesExtractor implements FeatureExtractor1 {
+public class GenericFeaturesExtractor implements FeatureExtractor1<IdentifiedAnnotation> {
@Override
- public List<Feature> extract(JCas jCas, Annotation arg) {
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg) {
- List<Feature> features = new ArrayList<Feature>();
+ List<Feature> features = new ArrayList<>();
// Pull in general dependency-based features -- externalize to another extractor?
ConllDependencyNode node = DependencyUtility.getNominalHeadNode(jCas, arg);
@@ -65,10 +65,10 @@ public class GenericFeaturesExtractor im
return features;
}
- private Collection<? extends Feature> hashToFeatureList(
+ private static Collection<? extends Feature> hashToFeatureList(
HashMap<String, Boolean> featsIn) {
- Collection<Feature> featsOut = new HashSet<Feature>();
+ Collection<Feature> featsOut = new HashSet<>();
for (String featName : featsIn.keySet()) {
featsOut.add(new Feature(featName,featsIn.get(featName)));
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java?rev=1635530&r1=1635529&r2=1635530&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java Thu Oct 30 16:06:02 2014
@@ -27,8 +27,8 @@ import java.util.List;
import org.apache.ctakes.assertion.attributes.history.HistoryAttributeClassifier;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.extractor.FeatureExtractor1;
@@ -39,13 +39,13 @@ import org.cleartk.ml.feature.extractor.
* @author shalgrim
*
*/
-public class HistoryFeaturesExtractor implements FeatureExtractor1 {
+public class HistoryFeaturesExtractor implements FeatureExtractor1<IdentifiedAnnotation> {
@Override
- public List<Feature> extract(JCas jCas, Annotation arg) {
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg) {
- List<Feature> features = new ArrayList<Feature>();
+ List<Feature> features = new ArrayList<>();
// Pull in general dependency-based features -- externalize to another extractor?
ConllDependencyNode node = DependencyUtility.getNominalHeadNode(jCas, arg);
@@ -71,10 +71,10 @@ public class HistoryFeaturesExtractor im
return features;
}
- private Collection<? extends Feature> hashToFeatureList(
+ private static Collection<? extends Feature> hashToFeatureList(
HashMap<String, Boolean> featsIn) {
- Collection<Feature> featsOut = new HashSet<Feature>();
+ Collection<Feature> featsOut = new HashSet<>();
for (String featName : featsIn.keySet()) {
featsOut.add(new Feature(featName,featsIn.get(featName)));
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java?rev=1635530&r1=1635529&r2=1635530&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java Thu Oct 30 16:06:02 2014
@@ -23,11 +23,15 @@ import java.net.URI;
import org.apache.ctakes.assertion.attributes.features.selection.Chi2FeatureSelection;
import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
+import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.Instance;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
public class ConditionalCleartkAnalysisEngine extends
AssertionCleartkAnalysisEngine {
@@ -35,9 +39,8 @@ public class ConditionalCleartkAnalysisE
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
- probabilityOfKeepingADefaultExample = 0.1;
+ probabilityOfKeepingADefaultExample = 1.0;
initializeFeatureSelection();
-
}
@Override
@@ -45,33 +48,26 @@ public class ConditionalCleartkAnalysisE
Instance<String> instance) throws AnalysisEngineProcessException {
if (this.isTraining())
{
- String conditional = (entityOrEventMention.getConditional()) ? "conditional" : "nonconditional";
+ boolean conditional = entityOrEventMention.getConditional();
// downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
- if ("nonconditional".equals(conditional)
+ if (!conditional
&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
return;
}
- instance.setOutcome(conditional);
-// this.dataWriter.write(instance);
-
+ instance.setOutcome(""+conditional);
} else
{
String label = this.classifier.classify(instance.getFeatures());
boolean conditional = false;
- if (label!= null && label.equals("conditional"))
- {
- conditional = true;
- } else if (label != null && label.equals("nonconditional"))
- {
- conditional = false;
+ if (label!= null){
+ conditional = Boolean.parseBoolean(label);
}
entityOrEventMention.setConditional(conditional);
}
}
public static FeatureSelection<String> createFeatureSelection(double threshold) {
- return new Chi2FeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false);
- // return new MutualInformationFeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME);
+ return new Chi2FeatureSelection<>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false);
}
public static URI createFeatureSelectionURI(File outputDirectoryName) {
@@ -83,16 +79,18 @@ public class ConditionalCleartkAnalysisE
if (featureSelectionThreshold == 0) {
this.featureSelection = null;
} else {
- this.featureSelection = this.createFeatureSelection(this.featureSelectionThreshold);
-
-// if ( (new File(this.featureSelectionURI)).exists() ) {
-// try {
-// this.featureSelection.load(this.featureSelectionURI);
-// } catch (IOException e) {
-// throw new ResourceInitializationException(e);
-// }
-// }
+ this.featureSelection = createFeatureSelection(this.featureSelectionThreshold);
}
}
-
+
+ public static AnalysisEngineDescription createAnnotatorDescription(String modelPath) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription(ConditionalCleartkAnalysisEngine.class,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ modelPath);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return createAnnotatorDescription("/org/apache/ctakes/assertion/models/conditional/model.jar");
+ }
+
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1635530&r1=1635529&r2=1635530&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Thu Oct 30 16:06:02 2014
@@ -28,10 +28,12 @@ import org.apache.ctakes.assertion.attri
import org.apache.ctakes.assertion.medfacts.cleartk.extractors.ContextWordWindowExtractor;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.Instance;
-import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
public class GenericCleartkAnalysisEngine extends
AssertionCleartkAnalysisEngine {
@@ -41,29 +43,13 @@ public class GenericCleartkAnalysisEngin
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
- probabilityOfKeepingADefaultExample = 0.1;
-
-// if (this.isTraining() && this.goldViewName == null) {
-// throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
-// }
-
-// if (USE_DEFAULT_EXTRACTORS) {
-// super.initialize(context);
-// } else {
- initialize_generic_extractor();
-// }
- initializeFeatureSelection();
+ probabilityOfKeepingADefaultExample = 0.5;
+ initialize_generic_extractor();
+ initializeFeatureSelection();
}
- private void initialize_generic_extractor() throws ResourceInitializationException {
-
-// if (this.contextFeatureExtractors==null) {
-// this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
-// }
-// this.contextFeatureExtractors.add(
-// new CleartkExtractor(
-// IdentifiedAnnotation.class, new GenericFeaturesExtractor()) );
+ private void initialize_generic_extractor() {
if(this.entityFeatureExtractors == null){
this.entityFeatureExtractors = new ArrayList<>();
}
@@ -76,24 +62,22 @@ public class GenericCleartkAnalysisEngin
Instance<String> instance) throws AnalysisEngineProcessException {
if (this.isTraining())
{
- String generic = entityOrEventMention.getGeneric()? "1":"0";
+ boolean generic = entityOrEventMention.getGeneric();
// downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
- if ("0".equals(generic)
+ if (!generic
&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
return;
}
- instance.setOutcome(generic);
-// this.dataWriter.write(instance);
+ instance.setOutcome(""+generic);
} else
{
String label = this.classifier.classify(instance.getFeatures());
- entityOrEventMention.setGeneric("1".equals(label));
+ entityOrEventMention.setGeneric(Boolean.parseBoolean(label));
}
}
public static FeatureSelection<String> createFeatureSelection(double threshold) {
- return new Chi2FeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false);
- // return new MutualInformationFeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME);
+ return new Chi2FeatureSelection<>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false);
}
public static URI createFeatureSelectionURI(File outputDirectoryName) {
@@ -105,16 +89,18 @@ public class GenericCleartkAnalysisEngin
if (featureSelectionThreshold == 0) {
this.featureSelection = null;
} else {
- this.featureSelection = this.createFeatureSelection(this.featureSelectionThreshold);
-
-// if ( (new File(this.featureSelectionURI)).exists() ) {
-// try {
-// this.featureSelection.load(this.featureSelectionURI);
-// } catch (IOException e) {
-// throw new ResourceInitializationException(e);
-// }
-// }
+ this.featureSelection = createFeatureSelection(this.featureSelectionThreshold);
}
}
-
+
+ public static AnalysisEngineDescription createAnnotatorDescription(String modelPath) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription(GenericCleartkAnalysisEngine.class,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ modelPath);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return createAnnotatorDescription("/org/apache/ctakes/assertion/models/generic/model.jar");
+ }
+
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java?rev=1635530&r1=1635529&r2=1635530&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java Thu Oct 30 16:06:02 2014
@@ -29,10 +29,12 @@ import org.apache.ctakes.assertion.medfa
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.Instance;
-import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
public class HistoryCleartkAnalysisEngine extends
AssertionCleartkAnalysisEngine {
@@ -42,30 +44,14 @@ public class HistoryCleartkAnalysisEngin
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
- probabilityOfKeepingADefaultExample = 0.1;
+ probabilityOfKeepingADefaultExample = 0.5;
-// if (this.isTraining() && this.goldViewName == null) {
-// throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
-// }
-
-// if (USE_DEFAULT_EXTRACTORS) {
-// super.initialize(context);
-// } else {
- initialize_history_extractor();
-// }
- initializeFeatureSelection();
-
+ initialize_history_extractor();
+ initializeFeatureSelection();
}
- private void initialize_history_extractor() throws ResourceInitializationException {
+ private void initialize_history_extractor() {
-// if (this.contextFeatureExtractors==null) {
-// this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
-// }
-// this.contextFeatureExtractors.add(
-// new CleartkExtractor(
-// IdentifiedAnnotation.class, new HistoryFeaturesExtractor()) );
-//
if(this.entityFeatureExtractors == null){
this.entityFeatureExtractors = new ArrayList<>();
}
@@ -87,7 +73,6 @@ public class HistoryCleartkAnalysisEngin
}
instance.setOutcome(String.valueOf(history));
-// this.dataWriter.write(instance);
} else
{
String label = this.classifier.classify(instance.getFeatures());
@@ -95,8 +80,7 @@ public class HistoryCleartkAnalysisEngin
}
}
public static FeatureSelection<String> createFeatureSelection(double threshold) {
- return new Chi2FeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false);
- // return new MutualInformationFeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME);
+ return new Chi2FeatureSelection<>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false);
}
public static URI createFeatureSelectionURI(File outputDirectoryName) {
@@ -108,16 +92,17 @@ public class HistoryCleartkAnalysisEngin
if (featureSelectionThreshold == 0) {
this.featureSelection = null;
} else {
- this.featureSelection = this.createFeatureSelection(this.featureSelectionThreshold);
-
-// if ( (new File(this.featureSelectionURI)).exists() ) {
-// try {
-// this.featureSelection.load(this.featureSelectionURI);
-// } catch (IOException e) {
-// throw new ResourceInitializationException(e);
-// }
-// }
+ this.featureSelection = createFeatureSelection(this.featureSelectionThreshold);
}
}
-
+
+ public static AnalysisEngineDescription createAnnotatorDescription(String modelPath) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription(HistoryCleartkAnalysisEngine.class,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ modelPath);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return createAnnotatorDescription("/org/apache/ctakes/assertion/models/historyOf/model.jar");
+ }
}