You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/07/16 20:10:16 UTC

svn commit: r1503814 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: eval/AssertionEvaluation.java train/CrossValidateChi2GridSearch.java util/NegationDepRegex.java

Author: swu
Date: Tue Jul 16 18:10:15 2013
New Revision: 1503814

URL: http://svn.apache.org/r1503814
Log:
small mods to assertion for chi2 feature selection grid search

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java   (with props)
Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1503814&r1=1503813&r2=1503814&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Tue Jul 16 18:10:15 2013
@@ -238,9 +238,9 @@ private static Logger logger = Logger.ge
 
     @Option(
     		name = "--feature-selection",
-    		usage = "Takes an argument: {c,m} corresponding to Chi-square or Mutual Information-based feature selection",
+    		usage = "Takes an argument: the Chi^2 feature selection threshold",
     		required = false)
-    public String featureSelectionAlgorithm = null;
+    public Float featureSelectionThreshold = null;
   }
   
   protected ArrayList<String> annotationTypes;
@@ -583,10 +583,10 @@ public static void printScore(Map<String
 //    builder.add(cuePhraseLookupAnnotator);
 
     // Set up Feature Selection parameters
-    Float featureSelectionThreshold = 0f;
+    Float featureSelectionThreshold = options.featureSelectionThreshold;
     Class<? extends DataWriter> dataWriterClassFirstPass = getDataWriterClass(); 
-    if (options.featureSelectionAlgorithm!=null) {
-    	featureSelectionThreshold = .1f;
+    if (options.featureSelectionThreshold==null) {
+    	featureSelectionThreshold = 0f;
     }
     
     // Add each assertion Analysis Engine to the pipeline!
@@ -850,7 +850,7 @@ public static void printScore(Map<String
     // run on existing output that has both system (or instance gathering) and gold
     for (JCas jCas : new JCasIterable(collectionReader, aggregate)) {
     	
-    	printViewNames("Views found by JCasIterable:", jCas);
+//    	printViewNames("Views found by JCasIterable:", jCas);
     	
       JCas goldView;
       try {
@@ -953,7 +953,7 @@ public static void printScore(Map<String
   }
 
   protected void trainAndPackage(String currentAssertionAttribute, File directory, String[] arguments) throws Exception {
-	  if (options.featureSelectionAlgorithm!=null) {
+	  if (options.featureSelectionThreshold!=null) {
 //		  InstanceDataWriter.INSTANCES_OUTPUT_FILENAME = "training-data.liblinear";
 		  // Extracting features and writing instances
 		  Iterable<Instance<String>> instances = InstanceStream.loadFromDirectory(directory);
@@ -1019,7 +1019,7 @@ public static void printScore(Map<String
   
   protected Class<? extends DataWriter> getDataWriterClass()
       throws ResourceInitializationException {
-    return (options.featureSelectionAlgorithm!=null)
+    return (options.featureSelectionThreshold!=null)
         ? InstanceDataWriter.class
         : LIBLINEARStringOutcomeDataWriter.class;
   }

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java?rev=1503814&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java Tue Jul 16 18:10:15 2013
@@ -0,0 +1,51 @@
+package org.apache.ctakes.assertion.train;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.apache.ctakes.assertion.eval.AssertionEvaluation;
+import org.apache.ctakes.assertion.util.AssertionConst;
+
+import scala.actors.threadpool.Arrays;
+
+public class CrossValidateChi2GridSearch {
+
+	public static void main(String[] args) throws Exception {
+		
+		float[] threshs = {0.0001f, 0.001f, 0.01f, 0.1f, 1f, 10f};
+		for (Float chi2threshold : threshs ) {
+			
+
+			for (String attribute : AssertionConst.annotationTypes) {
+
+				ArrayList<String> params = new ArrayList<String>();
+
+				params.add("--train-dir"); 			params.add(AssertionConst.trainingDirectories.get(attribute));
+				params.add("--models-dir"); 		params.add(AssertionConst.modelDirectory);
+				params.add("--cross-validation"); 	params.add("5");
+				params.add("--feature-selection");	params.add(Float.toString(chi2threshold));
+
+				// Build up an "ignore" string
+				for (String ignoreAttribute : AssertionConst.annotationTypes) {
+					if (!ignoreAttribute.equals(attribute)) { 
+
+						if (ignoreAttribute.equals("historyOf")) {
+							ignoreAttribute = ignoreAttribute.substring(0, ignoreAttribute.length()-2);
+						}
+
+						params.add("--ignore-" + ignoreAttribute);
+					}
+				}
+				String[] paramList = params.toArray(new String[]{});
+
+				//			System.out.println(Arrays.asList(paramList).toString());
+
+				// Run the actual assertion training on just one attribute
+				AssertionEvaluation.main( paramList );
+			}
+		}
+		
+		
+		
+	}
+}

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java?rev=1503814&r1=1503813&r2=1503814&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java Tue Jul 16 18:10:15 2013
@@ -521,10 +521,10 @@ public class NegationDepRegex {
 //		regexSet.add(regex_INNEG_NN);
 	
 		// Print out the regexSet for the fun of it!
-		System.out.println("### here are the regexes");
-		for (DependencyRegex dreg : regexSet) {
-			System.out.println(dreg.getName() + " :: " + dreg.toString());
-		}
+//		System.out.println("### here are the regexes");
+//		for (DependencyRegex dreg : regexSet) {
+//			System.out.println(dreg.getName() + " :: " + dreg.toString());
+//		}
 		
 	}