You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/07/16 20:10:16 UTC
svn commit: r1503814 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
eval/AssertionEvaluation.java train/CrossValidateChi2GridSearch.java
util/NegationDepRegex.java
Author: swu
Date: Tue Jul 16 18:10:15 2013
New Revision: 1503814
URL: http://svn.apache.org/r1503814
Log:
small mods to assertion for chi2 feature selection grid search
Added:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java (with props)
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1503814&r1=1503813&r2=1503814&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Tue Jul 16 18:10:15 2013
@@ -238,9 +238,9 @@ private static Logger logger = Logger.ge
@Option(
name = "--feature-selection",
- usage = "Takes an argument: {c,m} corresponding to Chi-square or Mutual Information-based feature selection",
+ usage = "Takes an argument: the Chi^2 feature selection threshold",
required = false)
- public String featureSelectionAlgorithm = null;
+ public Float featureSelectionThreshold = null;
}
protected ArrayList<String> annotationTypes;
@@ -583,10 +583,10 @@ public static void printScore(Map<String
// builder.add(cuePhraseLookupAnnotator);
// Set up Feature Selection parameters
- Float featureSelectionThreshold = 0f;
+ Float featureSelectionThreshold = options.featureSelectionThreshold;
Class<? extends DataWriter> dataWriterClassFirstPass = getDataWriterClass();
- if (options.featureSelectionAlgorithm!=null) {
- featureSelectionThreshold = .1f;
+ if (options.featureSelectionThreshold==null) {
+ featureSelectionThreshold = 0f;
}
// Add each assertion Analysis Engine to the pipeline!
@@ -850,7 +850,7 @@ public static void printScore(Map<String
// run on existing output that has both system (or instance gathering) and gold
for (JCas jCas : new JCasIterable(collectionReader, aggregate)) {
- printViewNames("Views found by JCasIterable:", jCas);
+// printViewNames("Views found by JCasIterable:", jCas);
JCas goldView;
try {
@@ -953,7 +953,7 @@ public static void printScore(Map<String
}
protected void trainAndPackage(String currentAssertionAttribute, File directory, String[] arguments) throws Exception {
- if (options.featureSelectionAlgorithm!=null) {
+ if (options.featureSelectionThreshold!=null) {
// InstanceDataWriter.INSTANCES_OUTPUT_FILENAME = "training-data.liblinear";
// Extracting features and writing instances
Iterable<Instance<String>> instances = InstanceStream.loadFromDirectory(directory);
@@ -1019,7 +1019,7 @@ public static void printScore(Map<String
protected Class<? extends DataWriter> getDataWriterClass()
throws ResourceInitializationException {
- return (options.featureSelectionAlgorithm!=null)
+ return (options.featureSelectionThreshold!=null)
? InstanceDataWriter.class
: LIBLINEARStringOutcomeDataWriter.class;
}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java?rev=1503814&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java Tue Jul 16 18:10:15 2013
@@ -0,0 +1,51 @@
+package org.apache.ctakes.assertion.train;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.apache.ctakes.assertion.eval.AssertionEvaluation;
+import org.apache.ctakes.assertion.util.AssertionConst;
+
+import scala.actors.threadpool.Arrays;
+
+public class CrossValidateChi2GridSearch {
+
+ public static void main(String[] args) throws Exception {
+
+ float[] threshs = {0.0001f, 0.001f, 0.01f, 0.1f, 1f, 10f};
+ for (Float chi2threshold : threshs ) {
+
+
+ for (String attribute : AssertionConst.annotationTypes) {
+
+ ArrayList<String> params = new ArrayList<String>();
+
+ params.add("--train-dir"); params.add(AssertionConst.trainingDirectories.get(attribute));
+ params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
+ params.add("--cross-validation"); params.add("5");
+ params.add("--feature-selection"); params.add(Float.toString(chi2threshold));
+
+ // Build up an "ignore" string
+ for (String ignoreAttribute : AssertionConst.annotationTypes) {
+ if (!ignoreAttribute.equals(attribute)) {
+
+ if (ignoreAttribute.equals("historyOf")) {
+ ignoreAttribute = ignoreAttribute.substring(0, ignoreAttribute.length()-2);
+ }
+
+ params.add("--ignore-" + ignoreAttribute);
+ }
+ }
+ String[] paramList = params.toArray(new String[]{});
+
+ // System.out.println(Arrays.asList(paramList).toString());
+
+ // Run the actual assertion training on just one attribute
+ AssertionEvaluation.main( paramList );
+ }
+ }
+
+
+
+ }
+}
Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java?rev=1503814&r1=1503813&r2=1503814&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java Tue Jul 16 18:10:15 2013
@@ -521,10 +521,10 @@ public class NegationDepRegex {
// regexSet.add(regex_INNEG_NN);
// Print out the regexSet for the fun of it!
- System.out.println("### here are the regexes");
- for (DependencyRegex dreg : regexSet) {
- System.out.println(dreg.getName() + " :: " + dreg.toString());
- }
+// System.out.println("### here are the regexes");
+// for (DependencyRegex dreg : regexSet) {
+// System.out.println(dreg.getName() + " :: " + dreg.toString());
+// }
}