You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2012/12/19 22:49:47 UTC

svn commit: r1424157 [1/3] - in /incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ ae/feature/ ae/feature/selection/ eval/

Author: stevenbethard
Date: Wed Dec 19 21:49:46 2012
New Revision: 1424157

URL: http://svn.apache.org/viewvc?rev=1424157&view=rev
Log:
Fixes svn:eol-style for .java files in ctakes-temporal

Modified:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SurfaceFormFeatureExtractor.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/Chi2NeighborFSExtractor.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/FeatureSelectionExtractor.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/MutualInformationFeatureSelectionExtractor.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java   (contents, props changed)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java   (contents, props changed)

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java?rev=1424157&r1=1424156&r2=1424157&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java Wed Dec 19 21:49:46 2012
@@ -1,380 +1,380 @@
-package org.apache.ctakes.temporal.ae;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-//import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-//import java.util.logging.Logger;
-
-//import org.apache.ctakes.temporal.ae.feature.CoveredTextToValuesExtractor;
-import org.apache.ctakes.temporal.ae.feature.PhraseExtractor;
-import org.apache.ctakes.temporal.ae.feature.SRLExtractor;
-import org.apache.ctakes.temporal.ae.feature.SurfaceFormFeatureExtractor;
-import org.apache.ctakes.temporal.ae.feature.selection.Chi2NeighborFSExtractor;
-import org.apache.ctakes.typesystem.type.constants.CONST;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.EventMention;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.cleartk.classifier.CleartkAnnotator;
-//import org.cleartk.classifier.DataWriter;
-import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.Instance;
-//import org.cleartk.classifier.feature.transform.InstanceDataWriter;
-import org.cleartk.classifier.chunking.BIOChunking;
-import org.cleartk.classifier.feature.extractor.CleartkExtractor;
-import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
-import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
-import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
-import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
-import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
-import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
-import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
-import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
-import org.cleartk.classifier.jar.DefaultDataWriterFactory;
-import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
-import org.cleartk.classifier.jar.GenericJarClassifierFactory;
-import org.uimafit.factory.AnalysisEngineFactory;
-import org.uimafit.factory.ConfigurationParameterFactory;
-import org.uimafit.descriptor.ConfigurationParameter;
-import org.uimafit.util.JCasUtil;
-
-//import com.google.common.base.Charsets;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-//import com.google.common.io.LineProcessor;
-//import com.google.common.io.Resources;
-
-public class EventAnnotator extends CleartkAnnotator<String> {
-
-  public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE = "ProbabilityOfKeepingANegativeExample";
-
-  @ConfigurationParameter(
-			name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
-			mandatory = false,
-			description = "probability that a negative example should be retained for training")
-  protected Float probabilityOfKeepingANegativeExample = 0.8f;
-  
-  public static final String PARAM_FEATURE_TRIM_ORNOT = "WhetherToDoFeatureSelection";
-
-  @ConfigurationParameter(
-			name = PARAM_FEATURE_TRIM_ORNOT,
-			mandatory = false,
-			description = "set whether feature selection is used or not")
-  public static Float featureTrim = 0f;
-  
-  public static AnalysisEngineDescription createDataWriterDescription(
-      String dataWriterName,
-      File outputDirectory, float downratio, float featureSelect) throws ResourceInitializationException {
-    return AnalysisEngineFactory.createPrimitiveDescription(
-        EventAnnotator.class,
-        CleartkAnnotator.PARAM_IS_TRAINING,
-        true,
-        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
-        dataWriterName,
-        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
-        outputDirectory,
-        EventAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
-        downratio,
-        EventAnnotator.PARAM_FEATURE_TRIM_ORNOT,
-        featureSelect);
-  }
-
-  public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
-      throws ResourceInitializationException {
-	 AnalysisEngineDescription fsEventAnnotator =AnalysisEngineFactory.createPrimitiveDescription(
-        EventAnnotator.class,
-        CleartkAnnotator.PARAM_IS_TRAINING,
-        false,
-        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
-        new File(modelDirectory, "model.jar"));
-	 ConfigurationParameterFactory.addConfigurationParameter(
-		fsEventAnnotator,	
-		EventAnnotator.PARAM_NB_FS_URI,
-		EventAnnotator.createNbFSURI(modelDirectory) );
-	 
-     return(fsEventAnnotator);
-  }
-
-  protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
-
-  protected List<CleartkExtractor> contextFeatureExtractors;
-
-  private BIOChunking<BaseToken, EntityMention> entityChunking;
-
-  private BIOChunking<BaseToken, EventMention> eventChunking;
-  
-  public static final String PARAM_NB_FS_URI = ConfigurationParameterFactory.createConfigurationParameterName(
-		      EventAnnotator.class,
-		      "neighborFsUri");
-
-	  @ConfigurationParameter(
-		  mandatory = false,
-		  description = "provides a URI where the neighbor annotation's feature selection data will be written")
-	  protected URI neighborFsUri;
-		    
-  public static final String FS_NEIGHBOR_EXTRACTOR_KEY = "SelectNeighborFeatures";
-
-  private Chi2NeighborFSExtractor<String> chi2NeighborFsExtractor;
-  
-  
-  public static URI createNbFSURI(File outputDirectoryName) {
-	    File f = new File(outputDirectoryName, FS_NEIGHBOR_EXTRACTOR_KEY + "_Chi2_extractor.dat");
-	    return f.toURI();
-	  }
-  //*****feature selection related parameters
-
-  @Override
-  public void initialize(UimaContext context) throws ResourceInitializationException {
-    super.initialize(context);
-
-    // define chunkings
-    this.entityChunking = new BIOChunking<BaseToken, EntityMention>(
-        BaseToken.class,
-        EntityMention.class,
-        "typeID");
-    this.eventChunking = new BIOChunking<BaseToken, EventMention>(
-        BaseToken.class,
-        EventMention.class);
-      
-    //configure FS extractor:
-    if (featureTrim > 0){//if feature selection
-        CombinedExtractor forneighbors    = new CombinedExtractor(
-    			new CoveredTextExtractor(),
-    			new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
-    			new TypePathExtractor(BaseToken.class, "partOfSpeech"),
-    			new SurfaceFormFeatureExtractor(),
-    	      	new PhraseExtractor(),
-    	      	new SRLExtractor());
-        
-        try {
-    		this.chi2NeighborFsExtractor = initNbFSExtractor(forneighbors);
-    	} catch (IOException e) {
-    		e.printStackTrace();
-    	}
-    }else{//if no feature selection
-        // add features: word, stem, pos and more
-        this.tokenFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
-        // try {
-        this.tokenFeatureExtractors.addAll(Arrays.asList(
-        		new CoveredTextExtractor(),
-//            	new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
-//            	new TypePathExtractor(BaseToken.class, "partOfSpeech"),//);
-//            	new SurfaceFormFeatureExtractor(),
-//            	new PhraseExtractor(),
-            	new SRLExtractor()));
-        		// new CoveredTextToValuesExtractor("ACF", StringToDoublesProcessor.parse("/word_freq.lst")),
-        		// new CoveredTextToValuesExtractor("PCA", StringToDoublesProcessor.parse("/word_pca.lst")),
-        		// new CoveredTextToValuesExtractor("TimPCA", StringToDoublesProcessor.parse("/tim_word_pca.txt"))));
-
-        //add window of features before and after
-        CombinedExtractor subExtractor = new CombinedExtractor(
-        	new CoveredTextExtractor(),
-        	new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
-        	new TypePathExtractor(BaseToken.class, "partOfSpeech"),//);
-        	new SurfaceFormFeatureExtractor(),
-        	new SRLExtractor());
-
-        this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
-        this.contextFeatureExtractors.add(new CleartkExtractor(
-        	BaseToken.class,
-        	subExtractor,
-        	new Preceding(3),
-        	new Following(3)));
-    }
-
-
-  }
-
-
-private Chi2NeighborFSExtractor<String> initNbFSExtractor(
-		CombinedExtractor subextractor) throws IOException{
-
-	Chi2NeighborFSExtractor<String> chi2NbFSExtractor = new  Chi2NeighborFSExtractor<String>(EventAnnotator.FS_NEIGHBOR_EXTRACTOR_KEY, BaseToken.class, subextractor, featureTrim, new Preceding(4),
-	    	new Following(4)); //the 3rd last parameter is used to control chi2 threshold, the last two are used to control window size
-	
-	if (this.neighborFsUri != null) {
-		chi2NbFSExtractor.load(this.neighborFsUri);
-	    }
-	return chi2NbFSExtractor;
-}
-
-
-  @Override
-  public void process(JCas jCas) throws AnalysisEngineProcessException {
-	  
-	Random rand = new Random();
-    // classify tokens within each sentence
-    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
-      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
-
-      // during training, the list of all outcomes for the tokens
-      List<String> outcomes;
-      if (this.isTraining()) {
-        List<EventMention> events = JCasUtil.selectCovered(jCas, EventMention.class, sentence);
-        outcomes = this.eventChunking.createOutcomes(jCas, tokens, events);
-      }
-      // during prediction, the list of outcomes predicted so far
-      else {
-        outcomes = new ArrayList<String>();
-      }
-
-      // get BIO entity tags for each entity type
-      int[] entityTypeIDs = new int[] {
-          CONST.NE_TYPE_ID_ANATOMICAL_SITE,
-          CONST.NE_TYPE_ID_DISORDER,
-          CONST.NE_TYPE_ID_DRUG,
-          CONST.NE_TYPE_ID_FINDING,
-          CONST.NE_TYPE_ID_PROCEDURE,
-          CONST.NE_TYPE_ID_UNKNOWN };
-      List<EntityMention> entities = JCasUtil.selectCovered(jCas, EntityMention.class, sentence);
-      Map<Integer, List<String>> entityTagsByType = new HashMap<Integer, List<String>>();
-      for (int typeID : entityTypeIDs) {
-        Predicate<EntityMention> hasTypeID = hasEntityType(typeID);
-        List<EntityMention> subEntities = Lists.newArrayList(Iterables.filter(entities, hasTypeID));
-        entityTagsByType.put(typeID, this.entityChunking.createOutcomes(jCas, tokens, subEntities));
-      }
-
-      // extract features for all tokens
-      int tokenIndex = -1;
-      int window = 2;
-      int nPreviousClassifications = 2;
-      
-      for (BaseToken token : tokens) {
-        ++tokenIndex;
-
-        List<Feature> features = new ArrayList<Feature>();
-        
-        if (featureTrim >0 ){//if feature selection
-        	features.addAll(this.chi2NeighborFsExtractor.extract(jCas, token)); //base features
-        	features.addAll(this.chi2NeighborFsExtractor.extractWithin(jCas, token, sentence)); //neighbor features
-        	features.addAll(this.chi2NeighborFsExtractor.extract(entityTypeIDs, entityTagsByType,tokenIndex, window)); // features from surrounding entities
-        	features.addAll(this.chi2NeighborFsExtractor.extract(nPreviousClassifications, tokenIndex, outcomes)); //features from previous classifications
-        }else{ //if no feature selection
-        	// features from token attributes
-            for (SimpleFeatureExtractor extractor : this.tokenFeatureExtractors) {
-              features.addAll(extractor.extract(jCas, token));
-            }
-            // features from surrounding tokens
-            for (CleartkExtractor extractor : this.contextFeatureExtractors) {
-              features.addAll(extractor.extractWithin(jCas, token, sentence));
-            }
-            // features from surrounding entities
-            for (int typeID : entityTypeIDs) {
-              List<String> tokenEntityTags = entityTagsByType.get(typeID);
-              int begin = Math.max(tokenIndex - window, 0);
-              int end = Math.min(tokenIndex + window, tokenEntityTags.size());
-              for (int i = begin; i < end; ++i) {
-                String name = String.format("EntityTag_%d_%d", typeID, i - begin);
-                features.add(new Feature(name, tokenEntityTags.get(i)));
-              }
-            }
-            // features from previous classifications
-            for (int i = nPreviousClassifications; i > 0; --i) {
-              int index = tokenIndex - i;
-              String previousOutcome = index < 0 ? "O" : outcomes.get(index);
-              features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
-            }
-        }
-        
-        // if training, write to data file
-        if (this.isTraining()) {
-            String outcome = outcomes.get(tokenIndex);
-            if(outcome.equals("O")){ //if it is an "O". downsample it
-          	  if (rand.nextDouble()<=probabilityOfKeepingANegativeExample)
-          		  this.dataWriter.write(new Instance<String>(outcome, features));
-            }else {
-          	  this.dataWriter.write(new Instance<String>(outcome, features));
-            }
-          }
-
-        // if predicting, add prediction to outcomes
-        else {
-          outcomes.add(this.classifier.classify(features));
-        }
-      }
-
-      // during prediction, convert chunk labels to events and add them to the CAS
-      if (!this.isTraining()) {
-        this.eventChunking.createChunks(jCas, tokens, outcomes);
-      }
-    }
-  }
-
-  private static Predicate<EntityMention> hasEntityType(final int typeID) {
-    return new Predicate<EntityMention>() {
-      public boolean apply(EntityMention mention) {
-        return mention.getTypeID() == typeID;
-      }
-    };
-  }
-
-//  private static class StringToDoublesProcessor implements LineProcessor<Map<String, double[]>> {
-//    private Logger logger = Logger.getLogger(this.getClass().getName());
-//
-//    private Map<String, double[]> result = new HashMap<String, double[]>();
-//
-//    private int length = -1;
-//
-//    @Override
-//    public Map<String, double[]> getResult() {
-//      return this.result;
-//    }
-//
-//    @Override
-//    public boolean processLine(String line) throws IOException {
-//      String[] parts = line.trim().split(",");
-//      String key = parts[0];
-//      int partsOffset = 0;
-//      if (this.length == -1) {
-//        this.length = parts.length;
-//      } else if (parts.length != this.length) {
-//        String message = "expected %d parts, found %d, skipping line '%s'";
-//        this.logger.warning(String.format(message, this.length, parts.length, line));
-//        return true;
-//      }
-//      double[] values = new double[parts.length - 1];
-//      for (int i = 0; i < values.length; ++i) {
-//        values[i] = Double.parseDouble(parts[i + 1 + partsOffset]);
-//      }
-//      this.result.put(key, values);
-//      return true;
-//    }
-//  }
-
-
-public Chi2NeighborFSExtractor<String> getChi2NbSubExtractor() {
-	return this.chi2NeighborFsExtractor;
-}
-}
+package org.apache.ctakes.temporal.ae;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+//import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+//import java.util.logging.Logger;
+
+//import org.apache.ctakes.temporal.ae.feature.CoveredTextToValuesExtractor;
+import org.apache.ctakes.temporal.ae.feature.PhraseExtractor;
+import org.apache.ctakes.temporal.ae.feature.SRLExtractor;
+import org.apache.ctakes.temporal.ae.feature.SurfaceFormFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.selection.Chi2NeighborFSExtractor;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+//import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+//import org.cleartk.classifier.feature.transform.InstanceDataWriter;
+import org.cleartk.classifier.chunking.BIOChunking;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.ConfigurationParameterFactory;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.util.JCasUtil;
+
+//import com.google.common.base.Charsets;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+//import com.google.common.io.LineProcessor;
+//import com.google.common.io.Resources;
+
+public class EventAnnotator extends CleartkAnnotator<String> {
+
+  public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE = "ProbabilityOfKeepingANegativeExample";
+
+  @ConfigurationParameter(
+			name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+			mandatory = false,
+			description = "probability that a negative example should be retained for training")
+  protected Float probabilityOfKeepingANegativeExample = 0.8f;
+  
+  public static final String PARAM_FEATURE_TRIM_ORNOT = "WhetherToDoFeatureSelection";
+
+  @ConfigurationParameter(
+			name = PARAM_FEATURE_TRIM_ORNOT,
+			mandatory = false,
+			description = "set whether feature selection is used or not")
+  public static Float featureTrim = 0f;
+  
+  public static AnalysisEngineDescription createDataWriterDescription(
+      String dataWriterName,
+      File outputDirectory, float downratio, float featureSelect) throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        EventAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        dataWriterName,
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        outputDirectory,
+        EventAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+        downratio,
+        EventAnnotator.PARAM_FEATURE_TRIM_ORNOT,
+        featureSelect);
+  }
+
+  public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+      throws ResourceInitializationException {
+	 AnalysisEngineDescription fsEventAnnotator =AnalysisEngineFactory.createPrimitiveDescription(
+        EventAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(modelDirectory, "model.jar"));
+	 ConfigurationParameterFactory.addConfigurationParameter(
+		fsEventAnnotator,	
+		EventAnnotator.PARAM_NB_FS_URI,
+		EventAnnotator.createNbFSURI(modelDirectory) );
+	 
+     return(fsEventAnnotator);
+  }
+
+  protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
+
+  protected List<CleartkExtractor> contextFeatureExtractors;
+
+  private BIOChunking<BaseToken, EntityMention> entityChunking;
+
+  private BIOChunking<BaseToken, EventMention> eventChunking;
+  
+  public static final String PARAM_NB_FS_URI = ConfigurationParameterFactory.createConfigurationParameterName(
+		      EventAnnotator.class,
+		      "neighborFsUri");
+
+	  @ConfigurationParameter(
+		  mandatory = false,
+		  description = "provides a URI where the neighbor annotation's feature selection data will be written")
+	  protected URI neighborFsUri;
+		    
+  public static final String FS_NEIGHBOR_EXTRACTOR_KEY = "SelectNeighborFeatures";
+
+  private Chi2NeighborFSExtractor<String> chi2NeighborFsExtractor;
+  
+  
+  public static URI createNbFSURI(File outputDirectoryName) {
+	    File f = new File(outputDirectoryName, FS_NEIGHBOR_EXTRACTOR_KEY + "_Chi2_extractor.dat");
+	    return f.toURI();
+	  }
+  //*****feature selection related parameters
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+
+    // define chunkings
+    this.entityChunking = new BIOChunking<BaseToken, EntityMention>(
+        BaseToken.class,
+        EntityMention.class,
+        "typeID");
+    this.eventChunking = new BIOChunking<BaseToken, EventMention>(
+        BaseToken.class,
+        EventMention.class);
+      
+    //configure FS extractor:
+    if (featureTrim > 0){//if feature selection
+        CombinedExtractor forneighbors    = new CombinedExtractor(
+    			new CoveredTextExtractor(),
+    			new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
+    			new TypePathExtractor(BaseToken.class, "partOfSpeech"),
+    			new SurfaceFormFeatureExtractor(),
+    	      	new PhraseExtractor(),
+    	      	new SRLExtractor());
+        
+        try {
+    		this.chi2NeighborFsExtractor = initNbFSExtractor(forneighbors);
+    	} catch (IOException e) {
+    		e.printStackTrace();
+    	}
+    }else{//if no feature selection
+        // add features: word, stem, pos and more
+        this.tokenFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+        // try {
+        this.tokenFeatureExtractors.addAll(Arrays.asList(
+        		new CoveredTextExtractor(),
+//            	new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
+//            	new TypePathExtractor(BaseToken.class, "partOfSpeech"),//);
+//            	new SurfaceFormFeatureExtractor(),
+//            	new PhraseExtractor(),
+            	new SRLExtractor()));
+        		// new CoveredTextToValuesExtractor("ACF", StringToDoublesProcessor.parse("/word_freq.lst")),
+        		// new CoveredTextToValuesExtractor("PCA", StringToDoublesProcessor.parse("/word_pca.lst")),
+        		// new CoveredTextToValuesExtractor("TimPCA", StringToDoublesProcessor.parse("/tim_word_pca.txt"))));
+
+        //add window of features before and after
+        CombinedExtractor subExtractor = new CombinedExtractor(
+        	new CoveredTextExtractor(),
+        	new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
+        	new TypePathExtractor(BaseToken.class, "partOfSpeech"),//);
+        	new SurfaceFormFeatureExtractor(),
+        	new SRLExtractor());
+
+        this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+        this.contextFeatureExtractors.add(new CleartkExtractor(
+        	BaseToken.class,
+        	subExtractor,
+        	new Preceding(3),
+        	new Following(3)));
+    }
+
+
+  }
+
+
+private Chi2NeighborFSExtractor<String> initNbFSExtractor(
+		CombinedExtractor subextractor) throws IOException{
+
+	Chi2NeighborFSExtractor<String> chi2NbFSExtractor = new  Chi2NeighborFSExtractor<String>(EventAnnotator.FS_NEIGHBOR_EXTRACTOR_KEY, BaseToken.class, subextractor, featureTrim, new Preceding(4),
+	    	new Following(4)); //the 3rd last parameter is used to control chi2 threshold, the last two are used to control window size
+	
+	if (this.neighborFsUri != null) {
+		chi2NbFSExtractor.load(this.neighborFsUri);
+	    }
+	return chi2NbFSExtractor;
+}
+
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+	  
+	Random rand = new Random();
+    // classify tokens within each sentence
+    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+
+      // during training, the list of all outcomes for the tokens
+      List<String> outcomes;
+      if (this.isTraining()) {
+        List<EventMention> events = JCasUtil.selectCovered(jCas, EventMention.class, sentence);
+        outcomes = this.eventChunking.createOutcomes(jCas, tokens, events);
+      }
+      // during prediction, the list of outcomes predicted so far
+      else {
+        outcomes = new ArrayList<String>();
+      }
+
+      // get BIO entity tags for each entity type
+      int[] entityTypeIDs = new int[] {
+          CONST.NE_TYPE_ID_ANATOMICAL_SITE,
+          CONST.NE_TYPE_ID_DISORDER,
+          CONST.NE_TYPE_ID_DRUG,
+          CONST.NE_TYPE_ID_FINDING,
+          CONST.NE_TYPE_ID_PROCEDURE,
+          CONST.NE_TYPE_ID_UNKNOWN };
+      List<EntityMention> entities = JCasUtil.selectCovered(jCas, EntityMention.class, sentence);
+      Map<Integer, List<String>> entityTagsByType = new HashMap<Integer, List<String>>();
+      for (int typeID : entityTypeIDs) {
+        Predicate<EntityMention> hasTypeID = hasEntityType(typeID);
+        List<EntityMention> subEntities = Lists.newArrayList(Iterables.filter(entities, hasTypeID));
+        entityTagsByType.put(typeID, this.entityChunking.createOutcomes(jCas, tokens, subEntities));
+      }
+
+      // extract features for all tokens
+      int tokenIndex = -1;
+      int window = 2;
+      int nPreviousClassifications = 2;
+      
+      for (BaseToken token : tokens) {
+        ++tokenIndex;
+
+        List<Feature> features = new ArrayList<Feature>();
+        
+        if (featureTrim >0 ){//if feature selection
+        	features.addAll(this.chi2NeighborFsExtractor.extract(jCas, token)); //base features
+        	features.addAll(this.chi2NeighborFsExtractor.extractWithin(jCas, token, sentence)); //neighbor features
+        	features.addAll(this.chi2NeighborFsExtractor.extract(entityTypeIDs, entityTagsByType,tokenIndex, window)); // features from surrounding entities
+        	features.addAll(this.chi2NeighborFsExtractor.extract(nPreviousClassifications, tokenIndex, outcomes)); //features from previous classifications
+        }else{ //if no feature selection
+        	// features from token attributes
+            for (SimpleFeatureExtractor extractor : this.tokenFeatureExtractors) {
+              features.addAll(extractor.extract(jCas, token));
+            }
+            // features from surrounding tokens
+            for (CleartkExtractor extractor : this.contextFeatureExtractors) {
+              features.addAll(extractor.extractWithin(jCas, token, sentence));
+            }
+            // features from surrounding entities
+            for (int typeID : entityTypeIDs) {
+              List<String> tokenEntityTags = entityTagsByType.get(typeID);
+              int begin = Math.max(tokenIndex - window, 0);
+              int end = Math.min(tokenIndex + window, tokenEntityTags.size());
+              for (int i = begin; i < end; ++i) {
+                String name = String.format("EntityTag_%d_%d", typeID, i - begin);
+                features.add(new Feature(name, tokenEntityTags.get(i)));
+              }
+            }
+            // features from previous classifications
+            for (int i = nPreviousClassifications; i > 0; --i) {
+              int index = tokenIndex - i;
+              String previousOutcome = index < 0 ? "O" : outcomes.get(index);
+              features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
+            }
+        }
+        
+        // if training, write to data file
+        if (this.isTraining()) {
+            String outcome = outcomes.get(tokenIndex);
+            if(outcome.equals("O")){ //if it is an "O". downsample it
+          	  if (rand.nextDouble()<=probabilityOfKeepingANegativeExample)
+          		  this.dataWriter.write(new Instance<String>(outcome, features));
+            }else {
+          	  this.dataWriter.write(new Instance<String>(outcome, features));
+            }
+          }
+
+        // if predicting, add prediction to outcomes
+        else {
+          outcomes.add(this.classifier.classify(features));
+        }
+      }
+
+      // during prediction, convert chunk labels to events and add them to the CAS
+      if (!this.isTraining()) {
+        this.eventChunking.createChunks(jCas, tokens, outcomes);
+      }
+    }
+  }
+
+  private static Predicate<EntityMention> hasEntityType(final int typeID) {
+    return new Predicate<EntityMention>() {
+      public boolean apply(EntityMention mention) {
+        return mention.getTypeID() == typeID;
+      }
+    };
+  }
+
+//  private static class StringToDoublesProcessor implements LineProcessor<Map<String, double[]>> {
+//    private Logger logger = Logger.getLogger(this.getClass().getName());
+//
+//    private Map<String, double[]> result = new HashMap<String, double[]>();
+//
+//    private int length = -1;
+//
+//    @Override
+//    public Map<String, double[]> getResult() {
+//      return this.result;
+//    }
+//
+//    @Override
+//    public boolean processLine(String line) throws IOException {
+//      String[] parts = line.trim().split(",");
+//      String key = parts[0];
+//      int partsOffset = 0;
+//      if (this.length == -1) {
+//        this.length = parts.length;
+//      } else if (parts.length != this.length) {
+//        String message = "expected %d parts, found %d, skipping line '%s'";
+//        this.logger.warning(String.format(message, this.length, parts.length, line));
+//        return true;
+//      }
+//      double[] values = new double[parts.length - 1];
+//      for (int i = 0; i < values.length; ++i) {
+//        values[i] = Double.parseDouble(parts[i + 1 + partsOffset]);
+//      }
+//      this.result.put(key, values);
+//      return true;
+//    }
+//  }
+
+
+public Chi2NeighborFSExtractor<String> getChi2NbSubExtractor() {
+	return this.chi2NeighborFsExtractor;
+}
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java?rev=1424157&r1=1424156&r2=1424157&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java Wed Dec 19 21:49:46 2012
@@ -1,73 +1,73 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.temporal.ae.feature;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
-import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
-
-public class CoveredTextToValuesExtractor implements SimpleFeatureExtractor {
-
-  private String name;
-
-  private Map<String, double[]> textDoublesMap;
-
-  private double[] meanValues;
-
-  public CoveredTextToValuesExtractor(String name, Map<String, double[]> textDoublesMap) {
-    super();
-    this.name = name;
-    this.textDoublesMap = textDoublesMap;
-    int nMapEntries = this.textDoublesMap.size();
-    if (nMapEntries == 0) {
-      throw new IllegalArgumentException("textDoublesMap cannot be empty");
-    }
-    int nValues = textDoublesMap.entrySet().iterator().next().getValue().length;
-    this.meanValues = new double[nValues];
-    for (double[] values : textDoublesMap.values()) {
-      for (int i = 0; i < values.length; ++i) {
-        this.meanValues[i] += values[i];
-      }
-    }
-    for (int i = 0; i < this.meanValues.length; ++i) {
-      this.meanValues[i] /= nMapEntries;
-    }
-  }
-
-  @Override
-  public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException {
-    double[] values = this.textDoublesMap.get(annotation.getCoveredText());
-    if (values == null) {
-      values = this.meanValues;
-    }
-    ArrayList<Feature> features = new ArrayList<Feature>();
-    for (int i = 0; i < values.length; ++i) {
-      String featureName = Feature.createName(this.name, String.valueOf(i));
-      features.add(new Feature(featureName, values[i]));
-    }
-    return features;
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+
+public class CoveredTextToValuesExtractor implements SimpleFeatureExtractor {
+
+  private String name;
+
+  private Map<String, double[]> textDoublesMap;
+
+  private double[] meanValues;
+
+  public CoveredTextToValuesExtractor(String name, Map<String, double[]> textDoublesMap) {
+    super();
+    this.name = name;
+    this.textDoublesMap = textDoublesMap;
+    int nMapEntries = this.textDoublesMap.size();
+    if (nMapEntries == 0) {
+      throw new IllegalArgumentException("textDoublesMap cannot be empty");
+    }
+    int nValues = textDoublesMap.entrySet().iterator().next().getValue().length;
+    this.meanValues = new double[nValues];
+    for (double[] values : textDoublesMap.values()) {
+      for (int i = 0; i < values.length; ++i) {
+        this.meanValues[i] += values[i];
+      }
+    }
+    for (int i = 0; i < this.meanValues.length; ++i) {
+      this.meanValues[i] /= nMapEntries;
+    }
+  }
+
+  @Override
+  public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException {
+    double[] values = this.textDoublesMap.get(annotation.getCoveredText());
+    if (values == null) {
+      values = this.meanValues;
+    }
+    ArrayList<Feature> features = new ArrayList<Feature>();
+    for (int i = 0; i < values.length; ++i) {
+      String featureName = Feature.createName(this.name, String.valueOf(i));
+      features.add(new Feature(featureName, values[i]));
+    }
+    return features;
+  }
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java?rev=1424157&r1=1424156&r2=1424157&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java Wed Dec 19 21:49:46 2012
@@ -1,50 +1,50 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.temporal.ae.feature;
-
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.ctakes.typesystem.type.syntax.Chunk;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
-import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
-import org.uimafit.util.JCasUtil;
-
-public class PhraseExtractor implements SimpleFeatureExtractor {
-
-  @Override
-  public List<Feature> extract(JCas jCas, Annotation token) throws CleartkExtractorException {
-    String featureValue = "NotNPVP";
-    for (Chunk chunk : JCasUtil.selectCovered(jCas, Chunk.class, token)) {
-      String chunkType = chunk.getChunkType();
-      if (chunkType.equals("NP")) {
-        featureValue = "NP";
-        break;
-      } else if (chunkType.equals("VP")) {
-        featureValue = "VP";
-        break;
-      }
-    }
-    return Collections.singletonList(new Feature("PhraseType", featureValue));
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class PhraseExtractor implements SimpleFeatureExtractor {
+
+  @Override
+  public List<Feature> extract(JCas jCas, Annotation token) throws CleartkExtractorException {
+    String featureValue = "NotNPVP";
+    for (Chunk chunk : JCasUtil.selectCovered(jCas, Chunk.class, token)) {
+      String chunkType = chunk.getChunkType();
+      if (chunkType.equals("NP")) {
+        featureValue = "NP";
+        break;
+      } else if (chunkType.equals("VP")) {
+        featureValue = "VP";
+        break;
+      }
+    }
+    return Collections.singletonList(new Feature("PhraseType", featureValue));
+  }
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java?rev=1424157&r1=1424156&r2=1424157&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java Wed Dec 19 21:49:46 2012
@@ -1,88 +1,88 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.temporal.ae.feature;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textsem.Predicate;
-import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
-import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
-import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
-import org.uimafit.util.JCasUtil;
-
-public class SRLExtractor implements SimpleFeatureExtractor {
-
-  @Override
-  public List<Feature> extract(JCas jCas, Annotation focusAnnotation)
-      throws CleartkExtractorException {
-    // and cache the results so that we only do this once per CAS
-	String jCasText = jCas.getDocumentText();
-	String roleFeat = "SemanticRole";
-	String roleVerbFeat = "RoleAndVerb";
-	String verb = "noVerb";
-    Feature role = new Feature(roleFeat, "NoRole");
-    Feature roleVerb = new Feature(roleVerbFeat, "NoRole"+verb);
-    ArrayList<Feature> features = new ArrayList<Feature>();
-    for (Predicate predicate : JCasUtil.select(jCas, Predicate.class)) {
-
-      for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, predicate)) {
-        if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
-          role = new Feature(roleFeat,"Predicate");
-          verb = jCasText.substring(predicate.getBegin(), predicate.getEnd());
-          roleVerb = new Feature(roleVerbFeat, "Predicate::"+verb);
-          
-          features.add(role);
-          features.add(roleVerb);
-          return features;
-        }
-      }
-
-      for (SemanticRoleRelation relation : JCasUtil.select(
-          predicate.getRelations(),
-          SemanticRoleRelation.class)) {
-        SemanticArgument arg = relation.getArgument();
-        // System.out.format("\tArg: %s=%s \n", arg.getLabel(), arg.getCoveredText());
-        for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, arg)) {
-          if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
-            String label = arg.getLabel();
-            Predicate currentPred = relation.getPredicate();
-            verb = jCasText.substring(currentPred.getBegin(), currentPred.getEnd());
-            role = new Feature(roleFeat, label);
-            roleVerb = new Feature(roleVerbFeat, label+"::"+verb);
-            
-            features.add(role);
-            features.add(roleVerb);
-            return features;
-          }
-        }
-      }
-    }
-
-    features.add(role);
-    features.add(roleVerb);
-    return features;
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.Predicate;
+import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
+import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class SRLExtractor implements SimpleFeatureExtractor {
+
+  @Override
+  public List<Feature> extract(JCas jCas, Annotation focusAnnotation)
+      throws CleartkExtractorException {
+    // and cache the results so that we only do this once per CAS
+	String jCasText = jCas.getDocumentText();
+	String roleFeat = "SemanticRole";
+	String roleVerbFeat = "RoleAndVerb";
+	String verb = "noVerb";
+    Feature role = new Feature(roleFeat, "NoRole");
+    Feature roleVerb = new Feature(roleVerbFeat, "NoRole"+verb);
+    ArrayList<Feature> features = new ArrayList<Feature>();
+    for (Predicate predicate : JCasUtil.select(jCas, Predicate.class)) {
+
+      for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, predicate)) {
+        if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
+          role = new Feature(roleFeat,"Predicate");
+          verb = jCasText.substring(predicate.getBegin(), predicate.getEnd());
+          roleVerb = new Feature(roleVerbFeat, "Predicate::"+verb);
+          
+          features.add(role);
+          features.add(roleVerb);
+          return features;
+        }
+      }
+
+      for (SemanticRoleRelation relation : JCasUtil.select(
+          predicate.getRelations(),
+          SemanticRoleRelation.class)) {
+        SemanticArgument arg = relation.getArgument();
+        // System.out.format("\tArg: %s=%s \n", arg.getLabel(), arg.getCoveredText());
+        for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, arg)) {
+          if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
+            String label = arg.getLabel();
+            Predicate currentPred = relation.getPredicate();
+            verb = jCasText.substring(currentPred.getBegin(), currentPred.getEnd());
+            role = new Feature(roleFeat, label);
+            roleVerb = new Feature(roleVerbFeat, label+"::"+verb);
+            
+            features.add(role);
+            features.add(roleVerb);
+            return features;
+          }
+        }
+      }
+    }
+
+    features.add(role);
+    features.add(roleVerb);
+    return features;
+  }
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SurfaceFormFeatureExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SurfaceFormFeatureExtractor.java?rev=1424157&r1=1424156&r2=1424157&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SurfaceFormFeatureExtractor.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SurfaceFormFeatureExtractor.java Wed Dec 19 21:49:46 2012
@@ -1,68 +1,68 @@
-package org.apache.ctakes.temporal.ae.feature;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
-import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
-
-public class SurfaceFormFeatureExtractor implements SimpleFeatureExtractor {
-
-	private final String SYMBOL = "Symbol";
-	private final String SYMBOL_REG = "\\W+";
-	private final String ALL_CAPITAL = "AllCapital";
-	private final String ALL_CAPITAL_REG = "[A-Z][A-Z]+";
-	private final String FIRST_CAPITAL = "FirstCapital";
-	private final String FIRST_CAPITAL_REG = "^[A-Z][a-z]+";
-	private final String SINGLE_CAPITAL = "SingelCapital";
-	private final String SINGLE_CAPITAL_REG = "^[A-Z]{1}$";
-	private final String SINGLE_LETTER ="SingleLetter";
-	private final String SINGLE_LETTER_REG = "^[a-z]{1}$";
-	private final String ALL_LOWER = "AllLower";
-	private final String ALL_LOWER_REG = "[a-z][a-z]+";
-	private final String NUMBER = "Number";
-	private final String NUMBER_REG ="[\\d]*\\.?[\\d]+";
-	private final String WORDNUMMIX ="WordNumberMix";
-	private final String WORDNUMMIX_REG ="[\\w][\\w]+";
-	private final String FEATURE_SURF = "Surface";
-	private final String FEATURE_LENGTH = "Length";
-	
-	@Override
-	public List<Feature> extract(JCas view, Annotation focusAnnotation)
-			throws CleartkExtractorException {
-		ArrayList<Feature> features = new ArrayList<Feature>();
-		String jCasText = view.getDocumentText();
-	    int begin = focusAnnotation.getBegin();
-	    int end = focusAnnotation.getEnd();
-	    String text = jCasText == null ? null : jCasText.substring(begin, end);
-	    features.add(new Feature(this.FEATURE_SURF, getStrType(text)));
-	    int length = text == null ? 0 : text.length();
-	    if (length <=1) features.add(new Feature(this.FEATURE_LENGTH, "single"));
-	    else features.add(new Feature(this.FEATURE_LENGTH, "multiple"));
-
-	    // create a single feature from the text
-	    return features;
-	}
-	
-	public static void main(String[] args) throws Exception {
-		SurfaceFormFeatureExtractor se = new SurfaceFormFeatureExtractor();
-		String test = "a";
-		System.out.println("String type is :" + se.getStrType(test));
-	}
-
-	private String getStrType(String test) {
-		if ( test.matches(this.ALL_CAPITAL_REG)) return this.ALL_CAPITAL;
-		else if ( test.matches(ALL_LOWER_REG)) return this.ALL_LOWER;
-		else if ( test.matches(FIRST_CAPITAL_REG)) return this.FIRST_CAPITAL;
-		else if ( test.matches(NUMBER_REG)) return this.NUMBER;
-		else if ( test.matches(SINGLE_CAPITAL_REG)) return this.SINGLE_CAPITAL;
-		else if ( test.matches(SINGLE_LETTER_REG)) return this.SINGLE_LETTER;
-		else if ( test.matches(SYMBOL_REG)) return this.SYMBOL;
-		else if ( test.matches(WORDNUMMIX_REG)) return this.WORDNUMMIX;
-		else return "Nomatch";
-	}
-
-}
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+
+public class SurfaceFormFeatureExtractor implements SimpleFeatureExtractor {
+
+	private final String SYMBOL = "Symbol";
+	private final String SYMBOL_REG = "\\W+";
+	private final String ALL_CAPITAL = "AllCapital";
+	private final String ALL_CAPITAL_REG = "[A-Z][A-Z]+";
+	private final String FIRST_CAPITAL = "FirstCapital";
+	private final String FIRST_CAPITAL_REG = "^[A-Z][a-z]+";
+	private final String SINGLE_CAPITAL = "SingelCapital";
+	private final String SINGLE_CAPITAL_REG = "^[A-Z]{1}$";
+	private final String SINGLE_LETTER ="SingleLetter";
+	private final String SINGLE_LETTER_REG = "^[a-z]{1}$";
+	private final String ALL_LOWER = "AllLower";
+	private final String ALL_LOWER_REG = "[a-z][a-z]+";
+	private final String NUMBER = "Number";
+	private final String NUMBER_REG ="[\\d]*\\.?[\\d]+";
+	private final String WORDNUMMIX ="WordNumberMix";
+	private final String WORDNUMMIX_REG ="[\\w][\\w]+";
+	private final String FEATURE_SURF = "Surface";
+	private final String FEATURE_LENGTH = "Length";
+	
+	@Override
+	public List<Feature> extract(JCas view, Annotation focusAnnotation)
+			throws CleartkExtractorException {
+		ArrayList<Feature> features = new ArrayList<Feature>();
+		String jCasText = view.getDocumentText();
+	    int begin = focusAnnotation.getBegin();
+	    int end = focusAnnotation.getEnd();
+	    String text = jCasText == null ? null : jCasText.substring(begin, end);
+	    features.add(new Feature(this.FEATURE_SURF, getStrType(text)));
+	    int length = text == null ? 0 : text.length();
+	    if (length <=1) features.add(new Feature(this.FEATURE_LENGTH, "single"));
+	    else features.add(new Feature(this.FEATURE_LENGTH, "multiple"));
+
+	    // create a single feature from the text
+	    return features;
+	}
+	
+	public static void main(String[] args) throws Exception {
+		SurfaceFormFeatureExtractor se = new SurfaceFormFeatureExtractor();
+		String test = "a";
+		System.out.println("String type is :" + se.getStrType(test));
+	}
+
+	private String getStrType(String test) {
+		if ( test.matches(this.ALL_CAPITAL_REG)) return this.ALL_CAPITAL;
+		else if ( test.matches(ALL_LOWER_REG)) return this.ALL_LOWER;
+		else if ( test.matches(FIRST_CAPITAL_REG)) return this.FIRST_CAPITAL;
+		else if ( test.matches(NUMBER_REG)) return this.NUMBER;
+		else if ( test.matches(SINGLE_CAPITAL_REG)) return this.SINGLE_CAPITAL;
+		else if ( test.matches(SINGLE_LETTER_REG)) return this.SINGLE_LETTER;
+		else if ( test.matches(SYMBOL_REG)) return this.SYMBOL;
+		else if ( test.matches(WORDNUMMIX_REG)) return this.WORDNUMMIX;
+		else return "Nomatch";
+	}
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SurfaceFormFeatureExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native