You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2016/04/25 23:27:26 UTC
svn commit: r1740904 - in
/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor:
ae/RelationExtractorAnnotator.java eval/RelationExtractorEvaluation.java
Author: clin
Date: Mon Apr 25 21:27:22 2016
New Revision: 1740904
URL: http://svn.apache.org/viewvc?rev=1740904&view=rev
Log:
fix a relation bug that caused location_of category counts to keep on increasing for each grid search option.
Modified:
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java?rev=1740904&r1=1740903&r2=1740904&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java Mon Apr 25 21:27:22 2016
@@ -57,263 +57,266 @@ import com.google.common.collect.Lists;
public abstract class RelationExtractorAnnotator extends CleartkAnnotator<String> {
- public static final String NO_RELATION_CATEGORY = "-NONE-";
+ public static final String NO_RELATION_CATEGORY = "-NONE-";
- public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE =
- "ProbabilityOfKeepingANegativeExample";
-
- public static Map<String, Integer> category_frequency = new LinkedHashMap<>();
-
- @ConfigurationParameter(
- name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- mandatory = false,
- description = "probability that a negative example should be retained for training")
- protected double probabilityOfKeepingANegativeExample = 1.0;
-
- protected Random coin = new Random(0);
-
- private List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> featureExtractors = this.getFeatureExtractors();
-
- private Class<? extends Annotation> coveringClass = getCoveringClass();
-
- /**
- * Defines the list of feature extractors used by the classifier. Subclasses
- * may override this method to provide a different set of feature extractors.
- *
- * @return The list of feature extractors to use.
- */
- protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
- return Lists.newArrayList(
- new TokenFeaturesExtractor(),
- new PartOfSpeechFeaturesExtractor(),
- new PhraseChunkingExtractor(),
- new NamedEntityFeaturesExtractor(),
- new DependencyTreeFeaturesExtractor(),
- new DependencyPathFeaturesExtractor());
- }
-
- protected Class<? extends BinaryTextRelation> getRelationClass() {
- return BinaryTextRelation.class;
- }
-
- /*
- * Defines the type of annotation that the relation exists within (sentence,
- * document, segment)
- */
- protected abstract Class<? extends Annotation> getCoveringClass();
-
- /**
- * Selects the relevant mentions/annotations within a covering annotation for
- * relation identification/extraction.
- */
- protected abstract Iterable<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
- JCas identifiedAnnotationView,
- Annotation coveringAnnotation);
-
- /**
- * Workaround for https://code.google.com/p/cleartk/issues/detail?id=346
- *
- * Not intended for external use
- */
- static void allowClassifierModelOnClasspath(UimaContext context) {
- String modelPathParam = GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH;
- String modelPath = (String) context.getConfigParameterValue(modelPathParam);
- if (modelPath != null) {
- URL modelClasspathURL = RelationExtractorAnnotator.class.getResource(modelPath);
- if (modelClasspathURL != null) {
- UimaContextAdmin contextAdmin = (UimaContextAdmin) context;
- ConfigurationManager manager = contextAdmin.getConfigurationManager();
- String qualifiedModelPathParam = contextAdmin.getQualifiedContextName() + modelPathParam;
- manager.setConfigParameterValue(qualifiedModelPathParam, modelClasspathURL.toString());
- }
- }
- }
-
- @Override
- public void initialize(UimaContext context) throws ResourceInitializationException {
- allowClassifierModelOnClasspath(context);
- super.initialize(context);
- }
-
- /*
- * Implement the standard UIMA process method.
- */
- @Override
- public void process(JCas jCas) throws AnalysisEngineProcessException {
-
- // lookup from pair of annotations to binary text relation
- // note: assumes that there will be at most one relation per pair
- Map<List<Annotation>, BinaryTextRelation> relationLookup;
- relationLookup = new HashMap<>();
- if (this.isTraining()) {
- relationLookup = new HashMap<>();
- for (BinaryTextRelation relation : JCasUtil.select(jCas, this.getRelationClass())) {
- Annotation arg1 = relation.getArg1().getArgument();
- Annotation arg2 = relation.getArg2().getArgument();
- // The key is a list of args so we can do bi-directional lookup
- List<Annotation> key = Arrays.asList(arg1, arg2);
- if(relationLookup.containsKey(key)){
- String reln = relationLookup.get(key).getCategory();
- System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
- System.err.println("Error! This attempted relation " + relation.getCategory() + " already has a relation " + reln + " at this span: " + arg1.getCoveredText() + " -- " + arg2.getCoveredText());
- }
- relationLookup.put(key, relation);
- }
- }
-
- // walk through each sentence in the text
- for (Annotation coveringAnnotation : JCasUtil.select(jCas, coveringClass)) {
-
- // walk through the pairs of annotations
- for (IdentifiedAnnotationPair pair : this.getCandidateRelationArgumentPairs(jCas, coveringAnnotation)) {
- IdentifiedAnnotation arg1 = pair.getArg1();
- IdentifiedAnnotation arg2 = pair.getArg2();
- // apply all the feature extractors to extract the list of features
- List<Feature> features = new ArrayList<>();
- for (RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation> extractor : this.featureExtractors) {
- List<Feature> feats = extractor.extract(jCas, arg1, arg2);
- if (feats != null) features.addAll(feats);
- }
-
- // sanity check on feature values
- for (Feature feature : features) {
- if (feature.getValue() == null) {
- feature.setValue("NULL");
- String message = String.format("Null value found in %s", feature);
- System.err.println(message);
-// throw new IllegalArgumentException(String.format(message, feature, features));
- }
- }
-
- // during training, feed the features to the data writer
- if (this.isTraining()) {
- String category = this.getRelationCategory(relationLookup, arg1, arg2);
- if (category == null) {
- continue;
- }
-
- //populate category_frequency count:
- if(category_frequency.containsKey(category)){
- category_frequency.put(category, category_frequency.get(category)+1);
- }else{
- category_frequency.put(category, 1);
- }
-
- // create a classification instance and write it to the training data
- this.dataWriter.write(new Instance<>(category, features));
- }
-
- // during classification feed the features to the classifier and create
- // annotations
- else {
- String predictedCategory = this.classify(features);
-
- // add a relation annotation if a true relation was predicted
- if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
-
- // if we predict an inverted relation, reverse the order of the
- // arguments
- if (predictedCategory.endsWith("-1")) {
- predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
- IdentifiedAnnotation temp = arg1;
- arg1 = arg2;
- arg2 = temp;
- }
-
- createRelation(jCas, arg1, arg2, predictedCategory);
- }
- }
- } // end pair in pairs
- } // end for(Sentence)
- }
-
- /**
- * Looks up the arguments in the specified lookup table and converts the
- * relation into a label for classification
- *
- * @return If this category should not be processed for training return
- * <i>null</i> otherwise it returns the label sent to the datawriter
- */
- protected String getRelationCategory(
- Map<List<Annotation>, BinaryTextRelation> relationLookup,
- IdentifiedAnnotation arg1,
- IdentifiedAnnotation arg2) {
- BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
- String category;
- if (relation != null) {
- category = relation.getCategory();
- } else if (coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
- category = NO_RELATION_CATEGORY;
- } else {
- category = null;
- }
- return category;
- }
-
- /**
- * Predict an outcome given a set of features. By default, this simply
- * delegates to the object's <code>classifier</code>. Subclasses may override
- * this method to implement more complex classification procedures.
- *
- * @param features
- * The features to be classified.
- * @return The predicted outcome (label) for the features.
- */
- protected String classify(List<Feature> features) throws CleartkProcessingException {
- return this.classifier.classify(features);
- }
-
- /**
- * Create a UIMA relation type based on arguments and the relation label. This
- * allows subclasses to create/define their own types: e.g. coreference can
- * create CoreferenceRelation instead of BinaryTextRelation
- *
- * @param jCas
- * - JCas object, needed to create new UIMA types
- * @param arg1
- * - First argument to relation
- * @param arg2
- * - Second argument to relation
- * @param predictedCategory
- * - Name of relation
- */
- protected void createRelation(
- JCas jCas,
- IdentifiedAnnotation arg1,
- IdentifiedAnnotation arg2,
- String predictedCategory) {
- // add the relation to the CAS
- RelationArgument relArg1 = new RelationArgument(jCas);
- relArg1.setArgument(arg1);
- relArg1.setRole("Argument");
- relArg1.addToIndexes();
- RelationArgument relArg2 = new RelationArgument(jCas);
- relArg2.setArgument(arg2);
- relArg2.setRole("Related_to");
- relArg2.addToIndexes();
- BinaryTextRelation relation = new BinaryTextRelation(jCas);
- relation.setArg1(relArg1);
- relation.setArg2(relArg2);
- relation.setCategory(predictedCategory);
- relation.addToIndexes();
- }
-
- public static class IdentifiedAnnotationPair {
-
- private final IdentifiedAnnotation arg1;
- private final IdentifiedAnnotation arg2;
-
- public IdentifiedAnnotationPair(IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
- this.arg1 = arg1;
- this.arg2 = arg2;
- }
-
- public final IdentifiedAnnotation getArg1() {
- return arg1;
- }
-
- public final IdentifiedAnnotation getArg2() {
- return arg2;
- }
- }
+ public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE =
+ "ProbabilityOfKeepingANegativeExample";
+
+ public static Map<String, Integer> category_frequency = new LinkedHashMap<>();
+
+ @ConfigurationParameter(
+ name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ mandatory = false,
+ description = "probability that a negative example should be retained for training")
+ protected double probabilityOfKeepingANegativeExample = 1.0;
+
+ protected Random coin = new Random(0);
+
+ private List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> featureExtractors = this.getFeatureExtractors();
+
+ private Class<? extends Annotation> coveringClass = getCoveringClass();
+
+ /**
+ * Defines the list of feature extractors used by the classifier. Subclasses
+ * may override this method to provide a different set of feature extractors.
+ *
+ * @return The list of feature extractors to use.
+ */
+ protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+ return Lists.newArrayList(
+ new TokenFeaturesExtractor(),
+ new PartOfSpeechFeaturesExtractor(),
+ new PhraseChunkingExtractor(),
+ new NamedEntityFeaturesExtractor(),
+ new DependencyTreeFeaturesExtractor(),
+ new DependencyPathFeaturesExtractor());
+ }
+
+ protected Class<? extends BinaryTextRelation> getRelationClass() {
+ return BinaryTextRelation.class;
+ }
+
+ /*
+ * Defines the type of annotation that the relation exists within (sentence,
+ * document, segment)
+ */
+ protected abstract Class<? extends Annotation> getCoveringClass();
+
+ /**
+ * Selects the relevant mentions/annotations within a covering annotation for
+ * relation identification/extraction.
+ */
+ protected abstract Iterable<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+ JCas identifiedAnnotationView,
+ Annotation coveringAnnotation);
+
+ /**
+ * Workaround for https://code.google.com/p/cleartk/issues/detail?id=346
+ *
+ * Not intended for external use
+ */
+ static void allowClassifierModelOnClasspath(UimaContext context) {
+ String modelPathParam = GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH;
+ String modelPath = (String) context.getConfigParameterValue(modelPathParam);
+ if (modelPath != null) {
+ URL modelClasspathURL = RelationExtractorAnnotator.class.getResource(modelPath);
+ if (modelClasspathURL != null) {
+ UimaContextAdmin contextAdmin = (UimaContextAdmin) context;
+ ConfigurationManager manager = contextAdmin.getConfigurationManager();
+ String qualifiedModelPathParam = contextAdmin.getQualifiedContextName() + modelPathParam;
+ manager.setConfigParameterValue(qualifiedModelPathParam, modelClasspathURL.toString());
+ }
+ }
+ }
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ allowClassifierModelOnClasspath(context);
+ super.initialize(context);
+ }
+
+ /*
+ * Implement the standard UIMA process method.
+ */
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ // lookup from pair of annotations to binary text relation
+ // note: assumes that there will be at most one relation per pair
+ Map<List<Annotation>, BinaryTextRelation> relationLookup;
+ relationLookup = new HashMap<>();
+ if (this.isTraining()) {
+ relationLookup = new HashMap<>();
+ for (BinaryTextRelation relation : JCasUtil.select(jCas, this.getRelationClass())) {
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ // The key is a list of args so we can do bi-directional lookup
+ List<Annotation> key = Arrays.asList(arg1, arg2);
+ if(relationLookup.containsKey(key)){
+ String reln = relationLookup.get(key).getCategory();
+ System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
+ System.err.println("Error! This attempted relation " + relation.getCategory() + " already has a relation " + reln + " at this span: " + arg1.getCoveredText() + " -- " + arg2.getCoveredText());
+ }
+ relationLookup.put(key, relation);
+ }
+ }
+
+ // walk through each sentence in the text
+ for (Annotation coveringAnnotation : JCasUtil.select(jCas, coveringClass)) {
+
+ // walk through the pairs of annotations
+ for (IdentifiedAnnotationPair pair : this.getCandidateRelationArgumentPairs(jCas, coveringAnnotation)) {
+ IdentifiedAnnotation arg1 = pair.getArg1();
+ IdentifiedAnnotation arg2 = pair.getArg2();
+ // apply all the feature extractors to extract the list of features
+ List<Feature> features = new ArrayList<>();
+ for (RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation> extractor : this.featureExtractors) {
+ List<Feature> feats = extractor.extract(jCas, arg1, arg2);
+ if (feats != null) features.addAll(feats);
+ }
+
+ // sanity check on feature values
+ for (Feature feature : features) {
+ if (feature.getValue() == null) {
+ feature.setValue("NULL");
+ String message = String.format("Null value found in %s", feature);
+ System.err.println(message);
+ // throw new IllegalArgumentException(String.format(message, feature, features));
+ }
+ }
+
+ // during training, feed the features to the data writer
+ if (this.isTraining()) {
+ String category = this.getRelationCategory(relationLookup, arg1, arg2);
+ if (category == null) {
+ continue;
+ }
+
+ //populate category_frequency count:
+ if(category_frequency.containsKey(category)){
+ category_frequency.put(category, category_frequency.get(category)+1);
+ }else{
+ category_frequency.put(category, 1);
+ }
+
+ // create a classification instance and write it to the training data
+ this.dataWriter.write(new Instance<>(category, features));
+ }
+
+ // during classification feed the features to the classifier and create
+ // annotations
+ else {
+ String predictedCategory = this.classify(features);
+
+ // add a relation annotation if a true relation was predicted
+ if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
+
+ // if we predict an inverted relation, reverse the order of the
+ // arguments
+ if (predictedCategory.endsWith("-1")) {
+ predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
+ IdentifiedAnnotation temp = arg1;
+ arg1 = arg2;
+ arg2 = temp;
+ }
+
+ createRelation(jCas, arg1, arg2, predictedCategory);
+ }
+ }
+ } // end pair in pairs
+ } // end for(Sentence)
+ }
+
+ /**
+ * Looks up the arguments in the specified lookup table and converts the
+ * relation into a label for classification
+ *
+ * @return If this category should not be processed for training return
+ * <i>null</i> otherwise it returns the label sent to the datawriter
+ */
+ protected String getRelationCategory(
+ Map<List<Annotation>, BinaryTextRelation> relationLookup,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) {
+ BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+ String category;
+ if (relation != null) {
+ category = relation.getCategory();
+ } else if (coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
+ category = NO_RELATION_CATEGORY;
+ } else {
+ category = null;
+ }
+ return category;
+ }
+
+ /**
+ * Predict an outcome given a set of features. By default, this simply
+ * delegates to the object's <code>classifier</code>. Subclasses may override
+ * this method to implement more complex classification procedures.
+ *
+ * @param features
+ * The features to be classified.
+ * @return The predicted outcome (label) for the features.
+ */
+ protected String classify(List<Feature> features) throws CleartkProcessingException {
+ return this.classifier.classify(features);
+ }
+
+ /**
+ * Create a UIMA relation type based on arguments and the relation label. This
+ * allows subclasses to create/define their own types: e.g. coreference can
+ * create CoreferenceRelation instead of BinaryTextRelation
+ *
+ * @param jCas
+ * - JCas object, needed to create new UIMA types
+ * @param arg1
+ * - First argument to relation
+ * @param arg2
+ * - Second argument to relation
+ * @param predictedCategory
+ * - Name of relation
+ */
+ protected void createRelation(
+ JCas jCas,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2,
+ String predictedCategory) {
+ // add the relation to the CAS
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(arg1);
+ relArg1.setRole("Argument");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(arg2);
+ relArg2.setRole("Related_to");
+ relArg2.addToIndexes();
+ BinaryTextRelation relation = new BinaryTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(predictedCategory);
+ relation.addToIndexes();
+ }
+
+ public static class IdentifiedAnnotationPair {
+
+ private final IdentifiedAnnotation arg1;
+ private final IdentifiedAnnotation arg2;
+
+ public IdentifiedAnnotationPair(IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+ this.arg1 = arg1;
+ this.arg2 = arg2;
+ }
+
+ public final IdentifiedAnnotation getArg1() {
+ return arg1;
+ }
+
+ public final IdentifiedAnnotation getArg2() {
+ return arg2;
+ }
+ }
+
+ public static void clearCategoryFrequency() {
+ category_frequency = new LinkedHashMap<>();
+ }
}
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1740904&r1=1740903&r2=1740904&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Mon Apr 25 21:27:22 2016
@@ -374,6 +374,8 @@ public class RelationExtractorEvaluation
List<String> parameters = new LinkedList<>(Arrays.asList(this.parameterSettings.trainingArguments));
List<String> additional = Arrays.asList(weightArray);
parameters.addAll(additional);
+
+ RelationExtractorAnnotator.clearCategoryFrequency();
// train the classifier and package it into a .jar file
JarClassifierBuilder.trainAndPackage(directory, parameters.toArray(new String[parameters.size()]));