You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/02/07 21:46:36 UTC
svn commit: r1565808 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
eval/AssertionEvaluation.java
medfacts/cleartk/AssertionCleartkAnalysisEngine.java
Author: tmill
Date: Fri Feb 7 20:46:36 2014
New Revision: 1565808
URL: http://svn.apache.org/r1565808
Log:
CTAKES-82: Add in list of feature configs and comment out cleartk-2.0-dependent code to get to copmile hopefully.
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1565808&r1=1565807&r2=1565808&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Fri Feb 7 20:46:36 2014
@@ -90,7 +90,7 @@ import org.cleartk.classifier.jar.Generi
import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
import org.cleartk.eval.Evaluation_ImplBase;
-import org.cleartk.ml.libsvm.tk.TKLIBSVMStringOutcomeDataWriter;
+//import org.cleartk.ml.libsvm.tk.TKLIBSVMStringOutcomeDataWriter;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.spi.BooleanOptionHandler;
@@ -392,10 +392,10 @@ private static Logger logger = Logger.ge
}
Class<? extends DataWriter<String>> dw = null;
if(options.featConfig == FEATURE_CONFIG.STK || options.featConfig == FEATURE_CONFIG.PTK){
- dw = TKLIBSVMStringOutcomeDataWriter.class;
- }else{
- dw = LIBSVMStringOutcomeDataWriter.class;
+// dw = TKLIBSVMStringOutcomeDataWriter.class;
+ throw new UnsupportedOperationException("This requires cleartk-2.0 which");
}
+ dw = LIBSVMStringOutcomeDataWriter.class;
AssertionEvaluation evaluation = new AssertionEvaluation(
modelsDir,
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1565808&r1=1565807&r2=1565808&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java Fri Feb 7 20:46:36 2014
@@ -32,7 +32,6 @@ import java.util.Random;
import org.apache.commons.io.FilenameUtils;
import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
import org.apache.ctakes.assertion.medfacts.cleartk.extractors.FedaFeatureFunction;
-import org.apache.ctakes.assertion.zoner.types.Zone;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.structured.DocumentID;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
@@ -51,6 +50,7 @@ import org.apache.uima.resource.Resource
import org.cleartk.classifier.CleartkAnnotator;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.TreeFeature;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
@@ -61,11 +61,9 @@ import org.uimafit.descriptor.Configurat
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.util.JCasUtil;
-//import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
-
-
import scala.actors.threadpool.Arrays;
+//import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
/**
* @author swu
@@ -77,6 +75,7 @@ public abstract class AssertionCleartkAn
Logger logger = Logger.getLogger(AssertionCleartkAnalysisEngine.class);
public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
+ public enum FEATURE_CONFIG {NO_SEM, NO_SYN, STK, STK_FRAGS, PTK, PTK_FRAGS, DEP_REGEX, DEP_REGEX_FRAGS, ALL_SYN}
public static int relationId; // counter for error logging
@@ -115,6 +114,13 @@ public abstract class AssertionCleartkAn
description = "the Chi-squared threshold at which features should be removed")
protected Float featureSelectionThreshold = 0f;
+ public static final String PARAM_FEATURE_CONFIG = "FEATURE_CONFIG";
+ @ConfigurationParameter(
+ name = PARAM_FEATURE_CONFIG,
+ description = "Feature configuration to use (for experiments)",
+ mandatory = false
+ )protected FEATURE_CONFIG featConfig = FEATURE_CONFIG.ALL_SYN;
+
public static final String PARAM_FEATURE_SELECTION_URI = "FeatureSelectionURI";
@ConfigurationParameter(
@@ -162,6 +168,7 @@ public abstract class AssertionCleartkAn
protected List<CleartkExtractor> tokenContextFeatureExtractors;
protected List<CleartkExtractor> tokenCleartkExtractors;
protected List<SimpleFeatureExtractor> entityFeatureExtractors;
+ protected List<SimpleFeatureExtractor> entityTreeExtractors;
protected CleartkExtractor cuePhraseInWindowExtractor;
protected List<FeatureFunctionExtractor> featureFunctionExtractors;
@@ -219,6 +226,7 @@ public abstract class AssertionCleartkAn
new CleartkExtractor(
BaseToken.class,
// new FeatureFunctionExtractor(new CoveredTextExtractor(), new LowerCaseFeatureFunction()),
+// new FeatureFunctionExtractor(new CoveredTextExtractor(), new BrownClusterFeatureFunction()),
new CoveredTextExtractor(),
//new CleartkExtractor.Covered(),
new CleartkExtractor.LastCovered(2),
@@ -281,6 +289,7 @@ public abstract class AssertionCleartkAn
// set up FeatureFunction for all the laggard, non-Extractor features
ffDomainAdaptor = new FedaFeatureFunction( new ArrayList<String>(new HashSet<String>(fileToDomain.values())) );
}
+ entityTreeExtractors = new ArrayList<SimpleFeatureExtractor>();
}
@Override
@@ -333,8 +342,8 @@ public abstract class AssertionCleartkAn
// Map<IdentifiedAnnotation, Collection<Sentence>> coveringSentenceMap = JCasUtil.indexCovering(identifiedAnnotationView, IdentifiedAnnotation.class, Sentence.class);
// Map<Sentence, Collection<BaseToken>> tokensCoveredInSentenceMap = JCasUtil.indexCovered(identifiedAnnotationView, Sentence.class, BaseToken.class);
- Map<IdentifiedAnnotation, Collection<Zone>> coveringZoneMap =
- JCasUtil.indexCovering(jCas, IdentifiedAnnotation.class, Zone.class);
+// Map<IdentifiedAnnotation, Collection<Zone>> coveringZoneMap =
+// JCasUtil.indexCovering(jCas, IdentifiedAnnotation.class, Zone.class);
// Map<IdentifiedAnnotation, Collection<Sentence>> coveringSents =
// JCasUtil.indexCovering(jCas, IdentifiedAnnotation.class, Sentence.class);
@@ -455,18 +464,22 @@ public abstract class AssertionCleartkAn
instance.addAll(extractor.extract(jCas, entityOrEventMention));
}
}
-
- List<Feature> zoneFeatures = extractZoneFeatures(coveringZoneMap, entityOrEventMention);
- if (zoneFeatures != null && !zoneFeatures.isEmpty())
- {
-// instance.addAll(zoneFeatures);
+
+ for (SimpleFeatureExtractor extractor : this.entityTreeExtractors) {
+ instance.addAll(extractor.extract(jCas, entityOrEventMention));
}
+
+// List<Feature> zoneFeatures = extractZoneFeatures(coveringZoneMap, entityOrEventMention);
+// if (zoneFeatures != null && !zoneFeatures.isEmpty())
+// {
+// instance.addAll(zoneFeatures);
+// }
List<Feature> feats = instance.getFeatures();
// List<Feature> lcFeats = new ArrayList<Feature>();
for(Feature feat : feats){
- if(feat.getName() != null && (feat.getName().startsWith("TreeFrag") || feat.getName().startsWith("WORD") || feat.getName().startsWith("NEG"))) continue;
+ if(feat instanceof TreeFeature || (feat.getName() != null && (feat.getName().startsWith("TreeFrag") || feat.getName().startsWith("WORD") || feat.getName().startsWith("NEG")))) continue;
if(feat.getName() != null && (feat.getName().contains("_TreeFrag") || feat.getName().contains("_WORD") || feat.getName().contains("_NEG"))) continue;
if(feat.getValue() instanceof String){
feat.setValue(((String)feat.getValue()).toLowerCase());
@@ -497,7 +510,8 @@ public abstract class AssertionCleartkAn
}
}
-
+
+ /*
public List<Feature> extractZoneFeatures(Map<IdentifiedAnnotation, Collection<Zone>> coveringZoneMap, IdentifiedAnnotation entityOrEventMention)
{
final Collection<Zone> zoneList = coveringZoneMap.get(entityOrEventMention);
@@ -522,6 +536,7 @@ public abstract class AssertionCleartkAn
return featureList;
}
+ */
public static AnalysisEngineDescription getDescription(Object... additionalConfiguration)
throws ResourceInitializationException {