You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/01/07 23:49:57 UTC
svn commit: r1430073 [1/7] - in /incubator/ctakes/trunk/ctakes-coreference:
./ .settings/ desc/ desc/analysis_engine/ desc/cas_consumer/
src/main/java/org/apache/ctakes/coreference/ae/
src/main/java/org/apache/ctakes/coreference/cc/ src/main/java/org/a...
Author: tmill
Date: Mon Jan 7 22:49:52 2013
New Revision: 1430073
URL: http://svn.apache.org/viewvc?rev=1430073&view=rev
Log:
ctakes-117: Adds features for doing mention similarity using statistics from wiki articles.
Additional features multiply other features together in meaningful ways:
(shared cui X entity type) e.g., two mentions of "liver" is probably the same because
a person only has one liver. two mentions of "pain" are less likely to be the same.
This feature captures that intuition by having type-specific synonym features.
Added:
incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/PreprocessAndWriteXmi.java (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fdt (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fdx (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fnm (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.frq (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.nrm (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.prx (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.tii (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.tis (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.tvd (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.tvf (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.tvx (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/segments.gen (with props)
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/segments_1 (with props)
Modified:
incubator/ctakes/trunk/ctakes-coreference/.classpath
incubator/ctakes/trunk/ctakes-coreference/.settings/org.eclipse.core.resources.prefs
incubator/ctakes/trunk/ctakes-coreference/NOTICE
incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableCreator.xml
incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableExpander.xml
incubator/ctakes/trunk/ctakes-coreference/desc/MipacqSvmChainCreator.xml
incubator/ctakes/trunk/ctakes-coreference/desc/analysis_engine/ODIESvmVectorCreator.xml
incubator/ctakes/trunk/ctakes-coreference/desc/cas_consumer/ODIEVectorFileWriterCasConsumer.xml
incubator/ctakes/trunk/ctakes-coreference/pom.xml
incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java
incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/ODIEVectorFileWriter.java
incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/FeatureVector.java
incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/PairAttributeCalculator.java
incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SvmVectorCreator.java
incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SyntaxAttributeCalculator.java
incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/ne.mayo.rbf.model
Modified: incubator/ctakes/trunk/ctakes-coreference/.classpath
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/.classpath?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/.classpath (original)
+++ incubator/ctakes/trunk/ctakes-coreference/.classpath Mon Jan 7 22:49:52 2013
@@ -7,18 +7,18 @@
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/UIMA"/>
- <classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
+ <classpathentry kind="src" output="target/classes" path="target/generated-sources/jcasgen">
<attributes>
+ <attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
- <classpathentry kind="src" output="target/test-classes" path="src/test/java">
+ <classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
- <attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
- <classpathentry kind="src" output="target/classes" path="target/generated-sources/jcasgen">
+ <classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
Modified: incubator/ctakes/trunk/ctakes-coreference/.settings/org.eclipse.core.resources.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/.settings/org.eclipse.core.resources.prefs?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/.settings/org.eclipse.core.resources.prefs (original)
+++ incubator/ctakes/trunk/ctakes-coreference/.settings/org.eclipse.core.resources.prefs Mon Jan 7 22:49:52 2013
@@ -1,5 +1,6 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
+encoding//src/test/java=UTF-8
encoding//target/generated-sources/jcasgen=UTF-8
encoding/<project>=UTF-8
Modified: incubator/ctakes/trunk/ctakes-coreference/NOTICE
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/NOTICE?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/NOTICE (original)
+++ incubator/ctakes/trunk/ctakes-coreference/NOTICE Mon Jan 7 22:49:52 2013
@@ -1,5 +1,5 @@
==============================================================================================
- Copyright to Children's Hostpital Boston
+ Copyright to Children's Hospital Boston
==============================================================================================
This product includes software (OpenAI_FSM.jar) developed by OpenAi Labs.
Modified: incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableCreator.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableCreator.xml?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableCreator.xml (original)
+++ incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableCreator.xml Mon Jan 7 22:49:52 2013
@@ -101,7 +101,7 @@
</configurationParameterSettings>
<typeSystemDescription>
<imports>
- <import location="type-system/CorefTypes.xml"/>
+ <import name="org.apache.ctakes.coreference.types.TypeSystem"/>
<import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
</imports>
</typeSystemDescription>
Modified: incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableExpander.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableExpander.xml?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableExpander.xml (original)
+++ incubator/ctakes/trunk/ctakes-coreference/desc/MipacqMarkableExpander.xml Mon Jan 7 22:49:52 2013
@@ -33,7 +33,7 @@
<typeSystemDescription>
<imports>
<import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
- <import location="type-system/CorefTypes.xml"/>
+ <import name="org.apache.ctakes.coreference.types.TypeSystem"/>
</imports>
</typeSystemDescription>
<typePriorities/>
Modified: incubator/ctakes/trunk/ctakes-coreference/desc/MipacqSvmChainCreator.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/desc/MipacqSvmChainCreator.xml?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/desc/MipacqSvmChainCreator.xml (original)
+++ incubator/ctakes/trunk/ctakes-coreference/desc/MipacqSvmChainCreator.xml Mon Jan 7 22:49:52 2013
@@ -32,7 +32,7 @@
<configurationParameterSettings/>
<typeSystemDescription>
<imports>
- <import location="type-system/CorefTypes.xml"/>
+ <import name="org.apache.ctakes.coreference.types.TypeSystem"/>
</imports>
</typeSystemDescription>
<typePriorities/>
Modified: incubator/ctakes/trunk/ctakes-coreference/desc/analysis_engine/ODIESvmVectorCreator.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/desc/analysis_engine/ODIESvmVectorCreator.xml?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/desc/analysis_engine/ODIESvmVectorCreator.xml (original)
+++ incubator/ctakes/trunk/ctakes-coreference/desc/analysis_engine/ODIESvmVectorCreator.xml Mon Jan 7 22:49:52 2013
@@ -24,7 +24,7 @@
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="ConstituencyParserAnnotator">
- <import location="../../../Constituency Parser/desc/ConstituencyParserAnnotator.xml"/>
+ <import location="../../../ctakes-constituency-parser/desc/ConstituencyParserAnnotator.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="TokenizerAnnotator">
<import location="../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml"/>
@@ -47,9 +47,6 @@
<delegateAnalysisEngine key="SentenceDetectorAnnotator">
<import location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
</delegateAnalysisEngine>
- <delegateAnalysisEngine key="ODIEVectorFileWriterCasConsumer">
- <import location="../cas_consumer/ODIEVectorFileWriterCasConsumer.xml"/>
- </delegateAnalysisEngine>
<delegateAnalysisEngine key="SimpleSegmentAnnotator">
<import location="../../../ctakes-clinical-pipeline/desc/analysis_engine/SimpleSegmentAnnotator.xml"/>
</delegateAnalysisEngine>
@@ -97,74 +94,12 @@
<parameter>Chunker/ChunkCreatorClass</parameter>
</overrides>
</configurationParameter>
- <configurationParameter>
- <name>outputDir</name>
- <description>Where the files will be written to</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>true</mandatory>
- <overrides>
- <parameter>ODIEVectorFileWriterCasConsumer/outputDir</parameter>
- </overrides>
- </configurationParameter>
- <configurationParameter>
- <name>goldStandardDir</name>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>true</mandatory>
- <overrides>
- <parameter>ODIEVectorFileWriterCasConsumer/goldStandardDir</parameter>
- </overrides>
- </configurationParameter>
- <configurationParameter>
- <name>writeVectors</name>
- <description>Should the consumer print out the vector pairs for the coreferent markables?</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>true</mandatory>
- <overrides>
- <parameter>ODIEVectorFileWriterCasConsumer/writeVectors</parameter>
- </overrides>
- </configurationParameter>
- <configurationParameter>
- <name>writeTrees</name>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>true</mandatory>
- <overrides>
- <parameter>ODIEVectorFileWriterCasConsumer/writeTrees</parameter>
- </overrides>
- </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>ChunkCreatorClass</name>
<value>
- <string>edu.mayo.bmi.uima.chunker.PhraseTypeChunkCreator</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>outputDir</name>
- <value>
- <string>change me</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>goldStandardDir</name>
- <value>
- <string>chang me</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>writeVectors</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>writeTrees</name>
- <value>
- <boolean>false</boolean>
+ <string>org.apache.ctakes.chunker.ae.PhraseTypeChunkCreator</string>
</value>
</nameValuePair>
</configurationParameterSettings>
@@ -185,7 +120,6 @@
<node>MipacqMarkableCreator</node>
<node>MipacqMarkableExpander</node>
<node>MipacqMarkablePairGenerator</node>
- <node>ODIEVectorFileWriterCasConsumer</node>
</fixedFlow>
</flowConstraints>
<typePriorities>
Modified: incubator/ctakes/trunk/ctakes-coreference/desc/cas_consumer/ODIEVectorFileWriterCasConsumer.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/desc/cas_consumer/ODIEVectorFileWriterCasConsumer.xml?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/desc/cas_consumer/ODIEVectorFileWriterCasConsumer.xml (original)
+++ incubator/ctakes/trunk/ctakes-coreference/desc/cas_consumer/ODIEVectorFileWriterCasConsumer.xml Mon Jan 7 22:49:52 2013
@@ -139,7 +139,7 @@
<name>stopWordsFile</name>
<description/>
<fileResourceSpecifier>
- <fileUrl>file:stop.txt</fileUrl>
+ <fileUrl>file:org/apache/ctakes/coreference/models/stop.txt</fileUrl>
</fileResourceSpecifier>
<implementationName>org.apache.ctakes.core.resource.FileResourceImpl</implementationName>
</externalResource>
@@ -147,7 +147,7 @@
<name>treeFragFile</name>
<description/>
<fileResourceSpecifier>
- <fileUrl>file:frags.txt</fileUrl>
+ <fileUrl>file:org/apache/ctakes/coreference/models/frags.txt</fileUrl>
</fileResourceSpecifier>
<implementationName>org.apache.ctakes.core.resource.FileResourceImpl</implementationName>
</externalResource>
Modified: incubator/ctakes/trunk/ctakes-coreference/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/pom.xml?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/pom.xml (original)
+++ incubator/ctakes/trunk/ctakes-coreference/pom.xml Mon Jan 7 22:49:52 2013
@@ -62,6 +62,31 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-relation-extractor</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-dictionary-lookup</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.ctakesresources</groupId>
+ <artifactId>ctakes-resources</artifactId>
+ <version>3.1.0</version>
+ <type>pom</type>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.ctakesresources</groupId>
+ <artifactId>ctakes-resources-umls2011ab</artifactId>
+ <version>3.1.0</version>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.ctakesresources</groupId>
+ <artifactId>ctakes-resources-distribution</artifactId>
+ <version>3.1.0</version>
+ <type>pom</type>
+ </dependency>
</dependencies>
<build>
Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java (original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java Mon Jan 7 22:49:52 2013
@@ -18,8 +18,6 @@
*/
package org.apache.ctakes.coreference.ae;
-import java.io.BufferedReader;
-import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -32,38 +30,37 @@ import libsvm.svm;
import libsvm.svm_model;
import libsvm.svm_node;
-import org.apache.log4j.Logger;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.EmptyFSList;
-import org.apache.uima.jcas.cas.FSList;
-import org.apache.uima.jcas.cas.NonEmptyFSList;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.ctakes.coreference.type.BooleanLabeledFS;
-
-
import org.apache.ctakes.core.resource.FileResource;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.coreference.type.BooleanLabeledFS;
+import org.apache.ctakes.coreference.type.DemMarkable;
+import org.apache.ctakes.coreference.type.Markable;
+import org.apache.ctakes.coreference.type.MarkablePairSet;
+import org.apache.ctakes.coreference.type.NEMarkable;
+import org.apache.ctakes.coreference.type.PronounMarkable;
import org.apache.ctakes.coreference.util.AbstractClassifier;
import org.apache.ctakes.coreference.util.CorefConsts;
import org.apache.ctakes.coreference.util.FSIteratorToList;
import org.apache.ctakes.coreference.util.FeatureVector;
import org.apache.ctakes.coreference.util.MarkableTreeUtils;
-import org.apache.ctakes.coreference.util.ParentPtrTree;
+import org.apache.ctakes.coreference.util.SvmVectorCreator;
import org.apache.ctakes.coreference.util.SyntaxAttributeCalculator;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.coreference.type.DemMarkable;
-import org.apache.ctakes.coreference.type.Markable;
-import org.apache.ctakes.coreference.type.MarkablePairSet;
-import org.apache.ctakes.coreference.type.NEMarkable;
-import org.apache.ctakes.coreference.type.PronounMarkable;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.EmptyFSList;
+import org.apache.uima.jcas.cas.FSList;
+import org.apache.uima.jcas.cas.NonEmptyFSList;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
public class MipacqSvmChainCreator extends JCasAnnotator_ImplBase {
@@ -74,18 +71,21 @@ public class MipacqSvmChainCreator exten
private boolean debug = false;
// svm models
- private AbstractClassifier mod_pron, mod_dem, mod_coref;
- private svm_model mod_anaphoricity;
+// private AbstractClassifier mod_pron, mod_dem, mod_coref;
+ private AbstractClassifier mod_coref;
+// private AbstractClassifier mod_pron;
+// private svm_model mod_anaphoricity;
// positive class label index in libsvm's prob_est array
- private int anaphoricity_idx, coref_idx;
- private org.apache.ctakes.coreference.util.SvmVectorCreator vecCreator = null;
+// private int anaphoricity_idx
+// private int coref_idx;
+ private SvmVectorCreator vecCreator = null;
- ParentPtrTree ppt;
+// ParentPtrTree ppt;
HashSet<String> stopwords;
private ArrayList<String> treeFrags;
- private svm_model loadModel (UimaContext uc, String m) {
+/* private svm_model loadModel (UimaContext uc, String m) {
svm_model ret = null;
try {
String r = ((FileResource) uc.getResourceObject(m)).getFile().getAbsolutePath();
@@ -97,21 +97,21 @@ public class MipacqSvmChainCreator exten
}
return ret;
}
-
+*/
@Override
public void initialize(UimaContext uc) throws ResourceInitializationException {
super.initialize(uc);
// Load svm models
- mod_anaphoricity = loadModel(uc, "svmAnaphoricityModel");
+// mod_anaphoricity = loadModel(uc, "svmAnaphoricityModel");
// FIXME why is there a minus one here?
- mod_pron = new AbstractClassifier(uc, "svmPronModel", FeatureVector.getPronCorefFeatures().length + SyntaxAttributeCalculator.getNumPronFeats() - 1);
+// mod_pron = new AbstractClassifier(uc, "svmPronModel", FeatureVector.getPronCorefFeatures().length + SyntaxAttributeCalculator.getNumPronFeats() - 1);
// mod_dem = new AbstractClassifier(uc, "svmDemModel", FeatureVector.getDemCorefFeatures().length + SyntaxAttributeCalculator.getNumDemFeats() - 1);
mod_coref = new AbstractClassifier(uc, "svmCorefModel", FeatureVector.getNECorefFeatures().length + SyntaxAttributeCalculator.getNumNEFeats() - 1);
- int[] labels = new int[2];
- svm.svm_get_labels(mod_anaphoricity, labels);
- anaphoricity_idx = labels[0]==1 ? 0 : 1;
+// int[] labels = new int[2];
+// svm.svm_get_labels(mod_anaphoricity, labels);
+// anaphoricity_idx = labels[0]==1 ? 0 : 1;
// svm.svm_get_labels(mod_coref, labels);
// coref_idx = labels[0]==1 ? 0 : 1;
@@ -130,7 +130,8 @@ public class MipacqSvmChainCreator exten
else if (i < 0)
stopwords.add(l.trim());
}
- vecCreator = new org.apache.ctakes.coreference.util.SvmVectorCreator(stopwords, mod_anaphoricity);
+ logger.info("Stop words list loaded: " + r.getFile().getAbsolutePath());
+ vecCreator = new SvmVectorCreator(stopwords);
treeFrags = new ArrayList<String>();
r = (FileResource) uc.getResourceObject("frags");
@@ -142,7 +143,7 @@ public class MipacqSvmChainCreator exten
}
vecCreator.setFrags(treeFrags);
}
- logger.info("Stop words list loaded: " + r.getFile().getAbsolutePath());
+ logger.info("Tree fragment features loaded: " + r.getFile().getAbsolutePath());
} catch (Exception e) {
e.printStackTrace();
logger.error("Error loading stop words list");
@@ -154,16 +155,23 @@ public class MipacqSvmChainCreator exten
// Convert the orderless FSIterator to List, sort by char offsets
LinkedList<Annotation> lm = FSIteratorToList.convert(
jcas.getJFSIndexRepository().getAnnotationIndex(Markable.type).iterator());
-
+ Map<Markable, NonEmptyFSList> collectionRas = new HashMap<Markable, NonEmptyFSList>();
+ String docName = DocumentIDAnnotationUtil.getDocumentID(jcas);
+ logger.info("Classifying coreference in document: " + docName);
+// ArrayList<CollectionTextRelation> chains = new ArrayList<CollectionTextRelation>();
+ int chainId = 0;
+
+ EmptyFSList emptyList = new EmptyFSList(jcas);
// Create a parent pointer tree to calculate equivalence classes
- ppt = new ParentPtrTree(lm.size());
+// ppt = new ParentPtrTree(lm.size());
// Make a data structure mapping markables to indexes so we don't lose the order if we re-arrange
Map<Markable, Integer> m2q = new HashMap<Markable,Integer>();
+
for(int p = 0; p < lm.size(); p++){
m2q.put((Markable)lm.get(p), p);
}
-
+
FSIterator<Annotation> iter = jcas.getAnnotationIndex(MarkablePairSet.type).iterator();
while(iter.hasNext()){
MarkablePairSet set = (MarkablePairSet) iter.next();
@@ -172,15 +180,23 @@ public class MipacqSvmChainCreator exten
MarkableProb bestAnte = null;
LinkedList<Markable> ll = fs2ll(fs);
if(anaphor instanceof PronounMarkable){
- bestAnte = processPronoun(anaphor, ll, jcas);
+ // There is not enough training data to do this reliably... the
+ // classifier for this type will decrease scores
+// bestAnte = processPronoun(anaphor, ll, jcas);
+ bestAnte = new MarkableProb(null, 0.0);
}else if(anaphor instanceof NEMarkable){
bestAnte = processNE(anaphor, ll, jcas);
}else if(anaphor instanceof DemMarkable){
bestAnte = processDem(anaphor, ll, jcas);
+ }else{
+ // should not happenn...
+ continue;
}
if(bestAnte.prob > CorefConsts.COREF_THRESHOLD){
+ // create the coref relation type
CoreferenceRelation cr = new CoreferenceRelation(jcas);
+ cr.setCategory("Coreference");
RelationArgument ra1 = new RelationArgument(jcas);
ra1.setId(bestAnte.m.getId());
ra1.setArgument(bestAnte.m.getContent());
@@ -195,42 +211,92 @@ public class MipacqSvmChainCreator exten
ra1.addToIndexes();
ra2.addToIndexes();
cr.addToIndexes();
- ppt.union(m2q.get(anaphor), m2q.get(bestAnte.m));
+
+ // propagate the collection relation type
+ RelationArgument anaRa = new RelationArgument(jcas);
+ anaRa.setId(anaphor.getId());
+ anaRa.setArgument(anaphor.getContent());
+ anaRa.setRole("mention");
+ NonEmptyFSList node = new NonEmptyFSList(jcas);
+ node.setHead(anaRa);
+ node.setTail(emptyList);
+ collectionRas.put(anaphor, node);
+ NonEmptyFSList anteNode = null;
+ if(collectionRas.containsKey(bestAnte.m)){
+ anteNode = collectionRas.get(bestAnte.m);
+ // find the end of the chain of this node
+ while(anteNode.getTail() instanceof NonEmptyFSList){
+ anteNode = (NonEmptyFSList) anteNode.getTail();
+ }
+ }else{
+ RelationArgument anteRa = new RelationArgument(jcas);
+ anteRa.setId(bestAnte.m.getId());
+ anteRa.setArgument(bestAnte.m.getContent());
+ anteRa.setRole("mention");
+
+ anteNode = new NonEmptyFSList(jcas);
+ anteNode.setHead(anteRa);
+ collectionRas.put(bestAnte.m, anteNode);
+ CollectionTextRelation chain = new CollectionTextRelation(jcas);
+ chain.setId(chainId++);
+ chain.setCategory("CoreferenceChain");
+ chain.setMembers(anteNode);
+ chain.addToIndexes();
+ }
+ anteNode.setTail(node);
+
+
+// ppt.union(m2q.get(anaphor), m2q.get(bestAnte.m));
if(anaphor instanceof PronounMarkable){
// if the anaphor is a pronoun then it won't be in the cas as an identifiedannotation so we need to add it.
IdentifiedAnnotation ia = new IdentifiedAnnotation(jcas);
-
+ // TODO
}
}else{
// indexNegativeExample(jcas, bestAnte.m, anaphor, bestAnte.prob);
}
}
+ logger.info("Done classifying document: " + docName);
- // Extract equivalence classes and save them into CAS
- int[] ec = new int[ppt.getSize()]; // class number for each Markable
- int n = ppt.equivCls(ec); // n holds the number of classes
- EmptyFSList elist = new EmptyFSList(jcas); // shared tail for all chains
- FSList[] listhds = new FSList[n]; // keep track of the heads of all chains
- CollectionTextRelation[] chains = new CollectionTextRelation[n];
-
- // Initialize n chains
- for (int i = 0; i < n; ++i) {
- chains[i] = new CollectionTextRelation(jcas);
- chains[i].setId(i);
- chains[i].setCategory("CoreferenceChain");
- chains[i].addToIndexes();
- listhds[i] = elist;
- }
-
- // Scan from the end of the Markable list
- // insert Markables to the head of their chains
- for (int i = ec.length-1; i >= 0; --i) {
- NonEmptyFSList l = new NonEmptyFSList(jcas);
- l.setHead(lm.get(i));
- l.setTail(listhds[ec[i]]);
- listhds[ec[i]] = l;
- chains[ec[i]].setMembers(l);
- }
+// // Extract equivalence classes and save them into CAS
+// int[] ec = new int[ppt.getSize()]; // class number for each Markable
+// int n = ppt.equivCls(ec); // n holds the number of classes
+// EmptyFSList elist = new EmptyFSList(jcas); // shared tail for all chains
+// FSList[] listhds = new FSList[n]; // keep track of the heads of all chains
+// CollectionTextRelation[] chains = new CollectionTextRelation[n];
+
+// // Initialize n chains
+// for (int i = 0; i < n; ++i) {
+// chains[i] = null; //new CollectionTextRelation(jcas);
+//// chains[i].setId(i);
+//// chains[i].setCategory("CoreferenceChain");
+//// chains[i].addToIndexes();
+// listhds[i] = elist;
+// }
+
+// // Scan from the end of the Markable list
+// // insert Markables to the head of their chains
+// for (int i = ec.length-1; i >= 0; --i) {
+// if(m2ra.containsKey(lm.get(i))){
+// NonEmptyFSList l = new NonEmptyFSList(jcas);
+// l.setHead(m2ra.get(lm.get(i)));
+// l.setTail(listhds[ec[i]]);
+// listhds[ec[i]] = l;
+// if(chains[ec[i]] == null){
+// chains[ec[i]] = new CollectionTextRelation(jcas);
+// }
+// chains[ec[i]].setMembers(l);
+// }
+// }
+
+// int j = 0;
+// for(int i = 0; i < n; i++){
+// if(chains[i] != null){
+// chains[i].setId(j++);
+// chains[i].setCategory("CoreferenceChain");
+// chains[i].addToIndexes();
+// }
+// }
}
Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/ODIEVectorFileWriter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/ODIEVectorFileWriter.java?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/ODIEVectorFileWriter.java (original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/ODIEVectorFileWriter.java Mon Jan 7 22:49:52 2013
@@ -22,69 +22,56 @@ import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
-import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.HashSet;
-import java.util.Hashtable;
import java.util.LinkedList;
import java.util.Scanner;
-import java.util.Vector;
-
-import org.apache.log4j.Logger;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.CASException;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.FSList;
-import org.apache.uima.jcas.cas.NonEmptyFSList;
-import org.apache.uima.jcas.cas.EmptyFSList;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.ProcessTrace;
-import org.apache.ctakes.coreference.type.BooleanLabeledFS;
+import libsvm.svm_node;
import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
import org.apache.ctakes.constituency.parser.util.TreeUtils;
import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResource;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.coreference.eval.helpers.Span;
-import org.apache.ctakes.coreference.eval.helpers.SpanAlignment;
-import org.apache.ctakes.coreference.eval.helpers.SpanOffsetComparator;
+import org.apache.ctakes.coreference.type.BooleanLabeledFS;
+import org.apache.ctakes.coreference.type.DemMarkable;
+import org.apache.ctakes.coreference.type.Markable;
+import org.apache.ctakes.coreference.type.MarkablePairSet;
+import org.apache.ctakes.coreference.type.NEMarkable;
import org.apache.ctakes.coreference.util.CorefConsts;
import org.apache.ctakes.coreference.util.FSIteratorToList;
import org.apache.ctakes.coreference.util.GoldStandardLabeler;
import org.apache.ctakes.coreference.util.MarkableTreeUtils;
import org.apache.ctakes.coreference.util.PairAttributeCalculator;
-import org.apache.ctakes.coreference.util.ParentPtrTree;
-import org.apache.ctakes.coreference.util.SvmUtils;
import org.apache.ctakes.coreference.util.SvmVectorCreator;
+import org.apache.ctakes.relationextractor.eval.XMIReader;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.utils.tree.SimpleTree;
-import org.apache.ctakes.coreference.type.MarkablePairSet;
-import org.apache.ctakes.coreference.type.Markable;
-import org.apache.ctakes.coreference.type.DemMarkable;
-import org.apache.ctakes.coreference.type.NEMarkable;
-import org.apache.ctakes.coreference.type.PronounMarkable;
-
-import libsvm.svm;
-import libsvm.svm_model;
-import libsvm.svm_node;
-import libsvm.svm_parameter;
-import libsvm.svm_problem;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSList;
+import org.apache.uima.jcas.cas.NonEmptyFSList;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.pipeline.SimplePipeline;
-public class ODIEVectorFileWriter extends CasConsumer_ImplBase {
+public class ODIEVectorFileWriter extends JCasAnnotator_ImplBase {
private Logger log = Logger.getLogger(this.getClass());
- private static final Integer NGRAM_THRESHOLD = 0;
+// private static final Integer NGRAM_THRESHOLD = 0;
private String outputDir = null;
private String goldStandardDir = null;
- private PrintWriter anaphOut = null;
+// private PrintWriter anaphOut = null;
private PrintWriter neOut = null;
private PrintWriter pronOut = null;
private PrintWriter demOut = null;
@@ -102,8 +89,8 @@ public class ODIEVectorFileWriter extend
private int posAnaphInst = 0;
private int negAnaphInst = 0;
// private svm_problem anaphProb = null;
- private ArrayList<Integer> anaphLabels = new ArrayList<Integer>();
- private ArrayList<svm_node[]> anaphNodes = new ArrayList<svm_node[]>();
+// private ArrayList<Integer> anaphLabels = new ArrayList<Integer>();
+// private ArrayList<svm_node[]> anaphNodes = new ArrayList<svm_node[]>();
// private ArrayList<Integer> corefLabels = new ArrayList<Integer>();
// private ArrayList<svm_node[]> corefNodes = new ArrayList<svm_node[]>();
// private ArrayList<TopTreebankNode> corefPathTrees = new ArrayList<TopTreebankNode>();
@@ -126,18 +113,26 @@ public class ODIEVectorFileWriter extend
// private boolean printModels;
private boolean printVectors;
private boolean printTrees;
- private boolean anaphora;
+// private boolean anaphora;
private boolean useFrags = true; // make a parameter once development is done...
+ public static final String PARAM_OUTPUT_DIR = "outputDir";
+ public static final String PARAM_GOLD_DIR = "goldStandardDir";
+ public static final String PARAM_VECTORS = "writeVectors";
+ public static final String PARAM_TREES = "writeTrees";
+// public static final String PARAM_ANAPH = "anaphora";
+ public static final String PARAM_FRAGS = "treeFrags";
+ public static final String PARAM_STOPS = "stopWords";
+
@Override
- public void initialize() throws ResourceInitializationException{
- outputDir = (String) getConfigParameterValue("outputDir");
- goldStandardDir = (String) getConfigParameterValue("goldStandardDir");
+ public void initialize(UimaContext aContext){
+ outputDir = (String) aContext.getConfigParameterValue(PARAM_OUTPUT_DIR);
+ goldStandardDir = (String) aContext.getConfigParameterValue(PARAM_GOLD_DIR);
// printModels = (Boolean) getConfigParameterValue("writeModels");
- printVectors = (Boolean) getConfigParameterValue("writeVectors");
- printTrees = (Boolean) getConfigParameterValue("writeTrees");
+ printVectors = (Boolean) aContext.getConfigParameterValue(PARAM_VECTORS);
+ printTrees = (Boolean) aContext.getConfigParameterValue(PARAM_TREES);
// upSample = (Boolean) getConfigParameterValue("upSample");
- anaphora = (Boolean) getConfigParameterValue("anaphora");
+// anaphora = (Boolean) aContext.getConfigParameterValue(PARAM_ANAPH);
try{
// need to initialize parameters to default values (except where noted)
@@ -147,12 +142,12 @@ public class ODIEVectorFileWriter extend
proDir.mkdirs();
File demDir = new File(outputDir + "/" + CorefConsts.DEM + "/vectors/");
demDir.mkdirs();
- if(printVectors){
- if(anaphora) anaphOut = new PrintWriter(outputDir + "/anaphor.trainingvectors.libsvm");
+// if(printVectors){
+// if(anaphora) anaphOut = new PrintWriter(outputDir + "/anaphor.trainingvectors.libsvm");
// neOut = new PrintWriter(outputDir + "/" + CorefConsts.NE + "/training.libsvm");
// demOut = new PrintWriter(outputDir + "/" + CorefConsts.DEM + "/training.libsvm");
// pronOut = new PrintWriter(outputDir + "/" + CorefConsts.PRON + "/training.libsvm");
- }
+// }
if(printTrees){
neTreeOut = new PrintWriter(outputDir + "/" + CorefConsts.NE + "/trees.txt");
demTreeOut = new PrintWriter(outputDir + "/" + CorefConsts.DEM + "/trees.txt");
@@ -163,8 +158,9 @@ public class ODIEVectorFileWriter extend
// pathTreeOut = new PrintWriter(outputDir + "/" + CorefConsts.NE + "/matrix.out");
// }
stopwords = new HashSet<String>();
- FileResource r = (FileResource) super.getUimaContext().getResourceObject("stopWords");
- BufferedReader br = new BufferedReader(new FileReader(r.getFile()));
+// FileResource r = (FileResource) aContext.getResourceObject("stopWords");
+ File stopFile = FileLocator.locateFile(((String)aContext.getConfigParameterValue(PARAM_STOPS)));
+ BufferedReader br = new BufferedReader(new FileReader(stopFile));
String l;
while ((l = br.readLine())!=null) {
l = l.trim();
@@ -175,11 +171,12 @@ public class ODIEVectorFileWriter extend
else if (i < 0)
stopwords.add(l.trim());
}
- File anaphModFile = FileLocator.locateFile("anaphoricity.mayo.rbf.model");
- svm_model anaphModel = svm.svm_load_model(anaphModFile.getAbsolutePath());
- vecCreator = new SvmVectorCreator(stopwords, anaphModel);
- r = (FileResource) super.getUimaContext().getResourceObject("treeFrags");
- Scanner scanner = new Scanner(r.getFile());
+// File anaphModFile = FileLocator.locateFile("anaphoricity.mayo.rbf.model");
+// svm_model anaphModel = svm.svm_load_model(anaphModFile.getAbsolutePath());
+ vecCreator = new SvmVectorCreator(stopwords);
+// r = (FileResource) aContext.getResourceObject("treeFrags");
+ File fragFile = FileLocator.locateFile(((String)aContext.getConfigParameterValue(PARAM_FRAGS)));
+ Scanner scanner = new Scanner(fragFile);
if(useFrags){
treeFrags = new ArrayList<String>();
while(scanner.hasNextLine()){
@@ -191,22 +188,21 @@ public class ODIEVectorFileWriter extend
initialized = true;
}catch(Exception e){
System.err.println("Error initializing file writers.");
- throw new ResourceInitializationException();
}
}
@Override
- public void processCas(CAS arg0) throws ResourceProcessException {
+ public void process(JCas jcas) {
// System.err.println("processCas-ing");
if(!initialized) return;
- JCas jcas;
- try {
- jcas = arg0.getCurrentView().getJCas();
- } catch (CASException e) {
- e.printStackTrace();
- System.err.println("No processing done in ODIEVectoFileWriter!");
- return;
- }
+// JCas jcas;
+// try {
+// jcas = arg0.getCurrentView().getJCas();
+// } catch (CASException e) {
+// e.printStackTrace();
+// System.err.println("No processing done in ODIEVectoFileWriter!");
+// return;
+// }
String docId = DocumentIDAnnotationUtil.getDocumentID(jcas);
docId = docId.substring(docId.lastIndexOf('/')+1, docId.length());
@@ -214,7 +210,7 @@ public class ODIEVectorFileWriter extend
// Hashtable<Integer, Integer> goldId2AlignId = new Hashtable<Integer, Integer>();
// Hashtable<Integer, Integer> alignId2GoldId = new Hashtable<Integer, Integer>();
if (docId==null) docId = "141471681_1";
- System.out.print("creating vectors for "+docId);
+ System.out.println("creating vectors for "+docId);
// Vector<Span> goldSpans = loadGoldStandard(docId, goldSpan2id);
int numPos = 0;
@@ -277,13 +273,13 @@ public class ODIEVectorFileWriter extend
NonEmptyFSList node = (NonEmptyFSList) pairList;
BooleanLabeledFS labeledProb = (BooleanLabeledFS) node.getHead();
int label = labeledProb.getLabel() ? 1 : 0;
- if(anaphora){
- if(label == 1) posAnaphInst++;
- else negAnaphInst++;
- anaphLabels.add(label);
- svm_node[] nodes = vecCreator.createAnaphoricityVector(anaphor, jcas);
- anaphNodes.add(nodes);
- }
+// if(anaphora){
+// if(label == 1) posAnaphInst++;
+// else negAnaphInst++;
+// anaphLabels.add(label);
+// svm_node[] nodes = vecCreator.createAnaphoricityVector(anaphor, jcas);
+// anaphNodes.add(nodes);
+// }
Markable antecedent = (Markable) labeledProb.getFeature();
label = (labeler.isGoldPair(anaphor, antecedent) ? 1 : 0);
if(label == 1){
@@ -361,7 +357,11 @@ public class ODIEVectorFileWriter extend
writer.println(" |ET|");
}
pairList = node.getTail();
- if(label == 1) break;
+ // NOTE: If this is in place, then we will only output negative examples backwards until we reach
+ // the actual coreferent entity. This may have the effect of suggesting that further away markables
+ // are _more_ likely to be coreferent, which is an assumption that probably does not hold up in the
+ // test set configuration. Try commenting this feature out to see if it makes the feature more useful.
+// if(label == 1) break;
}
}
if(printVectors){
@@ -378,38 +378,39 @@ public class ODIEVectorFileWriter extend
return Integer.parseInt(nodeStr.substring(0,1));
}
+
@Override
- public void collectionProcessComplete(ProcessTrace arg0)
- throws ResourceProcessException, IOException {
- super.collectionProcessComplete(arg0);
+ public void batchProcessComplete() throws AnalysisEngineProcessException {
+ super.batchProcessComplete();
+
// System.err.println("collectionProcessComplete!");
if(!initialized) return;
// int numPos = 1;
// int numNeg = 1;
-
- if(anaphora){
- double anaphRatio = (double) posAnaphInst / (double) negAnaphInst;
-// if(anaphRatio > 1.0) numNeg = (int) anaphRatio;
-// else numPos = (int) (1 / anaphRatio);
- for(int i = 0; i < anaphNodes.size(); i++){
- int label = anaphLabels.get(i);
-// int numIters = (label == 1 ? numPos : numNeg);
-// for(int j = 0; j < numIters; j++){
- anaphOut.print(label);
- for(svm_node node : anaphNodes.get(i)){
- anaphOut.print(" ");
- anaphOut.print(node.index);
- anaphOut.print(":");
- anaphOut.print(node.value);
- }
- anaphOut.println();
-// }
- }
- anaphOut.flush();
- anaphOut.close();
- return;
- }
+//
+// if(anaphora){
+// double anaphRatio = (double) posAnaphInst / (double) negAnaphInst;
+//// if(anaphRatio > 1.0) numNeg = (int) anaphRatio;
+//// else numPos = (int) (1 / anaphRatio);
+// for(int i = 0; i < anaphNodes.size(); i++){
+// int label = anaphLabels.get(i);
+//// int numIters = (label == 1 ? numPos : numNeg);
+//// for(int j = 0; j < numIters; j++){
+// anaphOut.print(label);
+// for(svm_node node : anaphNodes.get(i)){
+// anaphOut.print(" ");
+// anaphOut.print(node.index);
+// anaphOut.print(":");
+// anaphOut.print(node.value);
+// }
+// anaphOut.println();
+//// }
+// }
+// anaphOut.flush();
+// anaphOut.close();
+// return;
+// }
if(printVectors){
neOut.close();
demOut.close();
@@ -433,4 +434,48 @@ public class ODIEVectorFileWriter extend
}
return array;
}
+
+ public static void main(String[] args){
+ if(args.length < 3){
+ System.err.println("Arguments: <training directory> <gold-pairs directory> <output directory>");
+ System.exit(-1);
+ }
+ File xmiDir = new File(args[0]);
+ if(!xmiDir.isDirectory()){
+ System.err.println("Arg1 should be a directory! (full of xmi files)");
+ System.exit(-1);
+ }
+ File[] files = xmiDir.listFiles();
+// ArrayList<File> fileList = new ArrayList<File>();
+ String[] paths = new String[files.length];
+ for(int i = 0; i < files.length; i++){
+// fileList.add(files[i]);
+ paths[i] = files[i].getAbsolutePath();
+ }
+// TypeSystemDescription typeSystem =
+// TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../ctakes-type-system/desc/common_type_system.xml",
+// "desc/type-system/CorefTypes.xml",
+// "../assertion/desc/medfactsTypeSystem.xml");
+// TypeSystemDescription corefTypeSystem = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath();
+ try {
+ CollectionReader xmiReader = CollectionReaderFactory.createCollectionReader(XMIReader.class,
+// typeSystem,
+ XMIReader.PARAM_FILES,
+ paths);
+
+ AnalysisEngine consumer = AnalysisEngineFactory.createPrimitive(ODIEVectorFileWriter.class,
+// typeSystem,
+ ODIEVectorFileWriter.PARAM_VECTORS, true,
+ ODIEVectorFileWriter.PARAM_TREES, false,
+ ODIEVectorFileWriter.PARAM_STOPS, "org/apache/ctakes/coreference/models/stop.txt",
+ ODIEVectorFileWriter.PARAM_FRAGS, "org/apache/ctakes/coreference/models/frags.txt",
+ ODIEVectorFileWriter.PARAM_GOLD_DIR, args[1],
+ ODIEVectorFileWriter.PARAM_OUTPUT_DIR, args[2]);
+
+ SimplePipeline.runPipeline(xmiReader, consumer);
+ }catch(Exception e){
+ System.err.println("Exception thrown!");
+ e.printStackTrace();
+ }
+ }
}
Added: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/PreprocessAndWriteXmi.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/PreprocessAndWriteXmi.java?rev=1430073&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/PreprocessAndWriteXmi.java (added)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/PreprocessAndWriteXmi.java Mon Jan 7 22:49:52 2013
@@ -0,0 +1,129 @@
+package org.apache.ctakes.coreference.cc;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.uima.UIMAException;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASRuntimeException;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.util.XMLSerializer;
+import org.cleartk.util.Options_ImplBase;
+import org.kohsuke.args4j.Option;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.ConfigurationParameterFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.pipeline.SimplePipeline;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class PreprocessAndWriteXmi {
+ public static class Options extends Options_ImplBase {
+
+ @Option(name = "-t",
+ aliases = "--textRoot",
+ usage = "specify the directory contraining the textFiles (for example /NLP/Corpus/Relations/mipacq/text/train",
+ required = true)
+ public String textRoot;
+
+ // TODO - fix to use an xml collection reader instead of the hacky way it's done now...
+ // @Option(name = "-x",
+ // aliases = "--xmlRoot",
+ // usage = "specify the directory containing the knowtator xml files (for example: /NLP/Corpus/Relations/mipacq/xml/train",
+ // required = true)
+ // public File xmlRoot;
+
+ @Option(name = "-o",
+ aliases = "--outputRoot",
+ usage = "specify the directory to write out CAS XMI files",
+ required = true)
+ public File outputRoot;
+ }
+
+ /**
+ * @param args
+ * @throws IOException
+ * @throws UIMAException
+ */
+ public static void main(String[] args) throws UIMAException, IOException {
+ Options options = new Options();
+ options.parseOptions(args);
+
+ File outputRoot = options.outputRoot;
+ String inputRoot = options.textRoot;
+// TypeSystemDescription typeSystem =
+// TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../common-type-system/desc/common_type_system.xml",
+// "../assertion/desc/medfactsTypeSystem.xml");
+
+ AnalysisEngine ae = AnalysisEngineFactory.createAnalysisEngineFromPath("desc/analysis_engine/ODIESvmVectorCreator.xml");
+
+ CollectionReader reader = CollectionReaderFactory.createCollectionReaderFromPath(
+ "../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml",
+ FilesInDirectoryCollectionReader.PARAM_INPUTDIR,
+ inputRoot);
+
+ AnalysisEngine serializer = AnalysisEngineFactory.createPrimitive(
+ PreprocessAndWriteXmi.SerializeDocumentToXMI.class,
+// typeSystem,
+ PreprocessAndWriteXmi.SerializeDocumentToXMI.PARAM_OUTPUT_DIRECTORY,
+ outputRoot.getPath());
+
+ SimplePipeline.runPipeline(reader, ae, serializer);
+ }
+
+ public static class SerializeDocumentToXMI extends JCasAnnotator_ImplBase {
+ public static final String PARAM_OUTPUT_DIRECTORY = ConfigurationParameterFactory
+ .createConfigurationParameterName(SerializeDocumentToXMI.class, "outputDirectory");
+
+ @ConfigurationParameter(mandatory = true, description = "Specifies the output directory in which to write xmi files")
+ private File outputDirectory;
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ if (!this.outputDirectory.exists()) {
+ this.outputDirectory.mkdirs();
+ }
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ try {
+ // FIXME - not using this right now, just use default jcas
+// JCas goldView = jCas.getView(RelationExtractorEvaluation.GOLD_VIEW_NAME);
+ JCas goldView = jCas;
+ String documentID = DocumentIDAnnotationUtil.getDocumentID(goldView);
+ if (documentID == null) {
+ throw new IllegalArgumentException("No documentID for CAS:\n" + jCas);
+ }
+ File outFile = new File(this.outputDirectory, documentID + ".xmi");
+ ContentHandler handler = new XMLSerializer(new FileOutputStream(outFile)).getContentHandler();
+ new XmiCasSerializer(jCas.getTypeSystem()).serialize(jCas.getCas(), handler);
+ } catch (CASRuntimeException e) {
+ throw new AnalysisEngineProcessException(e);
+ } catch (SAXException e) {
+ throw new AnalysisEngineProcessException(e);
+ } catch (FileNotFoundException e) {
+ throw new AnalysisEngineProcessException(e);
+// } catch (CASException e) {
+// throw new AnalysisEngineProcessException(e);
+ }
+ }
+
+ }
+
+}
+
Propchange: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/PreprocessAndWriteXmi.java
------------------------------------------------------------------------------
svn:executable = *
Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/FeatureVector.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/FeatureVector.java?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/FeatureVector.java (original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/FeatureVector.java Mon Jan 7 22:49:52 2013
@@ -39,72 +39,66 @@ public class FeatureVector {
};
final static String[] ne_coref_feats = {
- "SameSection", // {yes, no} 1
- "TokenDistance" , // numeric
+ "TokenDistance" , // numeric 1
"SentenceDistance", // numeric
"ExactMatch" , // {yes,no}
"StartMatch" , // {yes,no}
-// "MidMatch" , // {yes,no}
"EndMatch" , // {yes,no}
-// "StringMatch" , // {yes,no}
"SoonStr" , // {C,I}
"Pronoun1" , // {Y,N}
"Pronoun2" , // {Y,N}
- "Definite2" , // {Y,N} 10
- "Demonstrative2" , // {Y,N}
- //"NumberMatch" , // {C,I,NA}
+ "Definite2" , // {Y,N}
+ "Demonstrative2" , // {Y,N} 10
"NumberMatchC" , // {Y,N}
"NumberMatchI" , // {Y,N}
"NumberMatchNA" , // {Y,N}
- //"WnClass" , // {C,I,NA}
"WnClassC" , // {Y,N}
"WnClassI" , // {Y,N}
"WnClassNA" , // {Y,N}
"Alias" , // {C,I}
"ProStr" , // {C,I}
- "SoonStrNonpro" , // {C,I} 20
- "WordOverlap" , // {C,I}
+ "SoonStrNonpro" , // {C,I}
+ "WordOverlap" , // {C,I} 20
"WordsSubstr" , // {C,I}
- //"BothDefinites" , // {C,I,NA}
"BothDefinitesC" , // {Y,N}
"BothDefinitesI" , // {Y,N}
"BothDefinitesNA" , // {Y,N}
- //"BothEmbedded" , // {C,I,NA}
- "BothEmbeddedC" , // {Y,N}
- "BothEmbeddedI" , // {Y,N}
- "BothEmbeddedNA" , // {Y,N}
- //"BothPronouns" , // {C,I,NA}
"BothPronounsC" , // {Y,N}
- "BothPronounsI" , // {Y,N} 30
+ "BothPronounsI" , // {Y,N}
"BothPronounsNA" , // {Y,N}
"Indefinite" , // {I,C}
"Pronoun" , // {I,C}
- "Definite1" , // {Y,N}
+ "Definite1" , // {Y,N} 30
"ClosestComp" , // {C,I}
"IsDrug" , // {Y,N}
"IsDisorder" , // {Y,N}
"IsFinding" , // {Y,N}
"IsProcedure" , // {Y,N}
- "IsAnatomicalSite" , // {Y,N} 40
+ "IsAnatomicalSite" , // {Y,N}
"NPHead" , // {yes, no}
// "Anaph" , // numeric
// "PermStrDist" , //
"PathLength" , // number of nodes in full path 37
"NPunderVP1" , // NP object?
- "NPunderVP2" , //
+ "NPunderVP2" , // 40
"NPunderS1" , // NP subject?
"NPunderS2" , //
"NPunderPP1" , // PP object?
"NPunderPP2" , //
"NPSubj1" , //
- "NPSubj2" , // 50
- "NPSubjBoth" , //
-// "NegatedBoth" , //
-// "NonNegatedBoth" ,
- //"NPSubjBoth" , //
-// "Cat:Ngrams" , // :n-1
-// "TK" ,
-// "WordsStr" // not used, why?
+ "NPSubj2" , //
+ "NPSubjBoth" , //
+ "WikiSim" ,
+// "EntityWikiSim" ,
+// "SimSum" , // 50
+ "AliasDrug" ,
+ "AliasDisorder" ,
+ "AliasFinding" ,
+ "AliasProcedure" ,
+ "AliasAnatomy" ,
+ "EntityStartMatch",
+ "EntityExactMatch",
+ "EntityEndMatch",
};
final static String[] pron_coref_feats = ne_coref_feats;
Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/PairAttributeCalculator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/PairAttributeCalculator.java?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/PairAttributeCalculator.java (original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/PairAttributeCalculator.java Mon Jan 7 22:49:52 2013
@@ -53,17 +53,23 @@ import org.apache.ctakes.coreference.typ
public class PairAttributeCalculator extends AttributeCalculator {
protected Markable m1, m2;
- protected String s1, s2;
+ protected String ms1, ms2; // markable strings
+ protected String es1, es2; // entity strings
+// protected String s1, s2;
protected Annotation a1, a2;
-
+ boolean alias;
+
public PairAttributeCalculator (JCas jcas, Markable m1, Markable m2) {
super(jcas);
this.m1 = m1;
this.m2 = m2;
this.a1 = m1.getContent();
this.a2 = m2.getContent();
- s1 = m1.getCoveredText();
- s2 = m2.getCoveredText();
+ ms1 = m1.getCoveredText();
+ ms2 = m2.getCoveredText();
+ es1 = a1.getCoveredText();
+ es2 = a2.getCoveredText();
+ alias = isAlias();
}
/**
@@ -107,39 +113,51 @@ public class PairAttributeCalculator ext
return AnnotationCounter.countPoint(AnnotationSelector.selectSentence(jcas), m1.getEnd(), m2.getBegin());
}
- public String calcExactMatch () {
- return s1.equalsIgnoreCase(s2) ? "yes" : "no";
+ public boolean calcExactMatch () {
+ return ms1.equalsIgnoreCase(ms2);
}
- public String calcStartMatch () {
- return TextMatch.startMatch(s1, s2) ? "yes" : "no";
+ public boolean calcStartMatch () {
+ return TextMatch.startMatch(ms1, ms2);
}
- public String calcMidMatch () {
- return "no";
+ public boolean calcMidMatch () {
+ return false;
}
- public String calcEndMatch () {
- return TextMatch.endMatch(s1, s2) ? "yes" : "no";
+ public boolean calcEndMatch () {
+ return TextMatch.endMatch(ms1, ms2);
}
- public String calcStringMatch() {
- return ( calcExactMatch().equals("yes") || calcStartMatch().equals("yes") || calcEndMatch().equals("yes") ? "yes" : "no");
+ public boolean calcStringMatch() {
+ return (calcExactMatch() || calcStartMatch() || calcEndMatch());
}
- public String calcSoonStr () {
- String sl1 = s1.toLowerCase();
- String sl2 = s2.toLowerCase();
+ public boolean calcEntityExactMatch() {
+ return es1.equalsIgnoreCase(es2);
+ }
+
+ public boolean calcEntityStartMatch() {
+ return TextMatch.startMatch(es1, es2);
+ }
+
+ public boolean calcEntityEndMatch(){
+ return TextMatch.endMatch(es1, es2);
+ }
+
+ public boolean calcSoonStr () {
+ String sl1 = ms1.toLowerCase();
+ String sl2 = ms2.toLowerCase();
// if (sl1.startsWith("the ")) sl1 = sl1.substring(4);
// if (sl1.startsWith("a ")) sl1 = sl1.substring(2);
// if (sl2.startsWith("the ")) sl2 = sl2.substring(4);
// if (sl2.startsWith("a ")) sl2 = sl2.substring(2);
sl1 = nonDetSubstr(sl1);
sl2 = nonDetSubstr(sl2);
- return sl1.equals(sl2) ? "C" : "I";
+ return sl1.equals(sl2);
}
- private String nonDetSubstr (String s) {
+ private static String nonDetSubstr (String s) {
if(s.startsWith("the ")) return s.substring(4);
if(s.startsWith("a ")) return s.substring(2);
if(s.startsWith("this ")) return s.substring(5);
@@ -147,58 +165,59 @@ public class PairAttributeCalculator ext
return s;
}
- public String calcPronoun1 () {
- return isPronoun(m1) ? "Y" : "N";
+ public boolean calcPronoun1 () {
+ return isPronoun(m1);
}
- public String calcPronoun2 () {
- return isPronoun(m2) ? "Y" : "N";
+ public boolean calcPronoun2 () {
+ return isPronoun(m2);
}
- public String calcDefinite2 () {
- return isDefinite(s2) ? "Y" : "N";
+ public boolean calcDefinite2 () {
+ return isDefinite(ms2);
}
- public String calcDemonstrative2 () {
- return isDemonstrative(s2) ? "Y" : "N";
+ public boolean calcDemonstrative2 () {
+ return isDemonstrative(ms2);
}
- public String calcNumberMatchC () {
+ public boolean calcNumberMatchC () {
String n1 = number(m1);
String n2 = number(m2);
- if (!n1.equals("U") && !n2.equals("U") && n1.equals(n2))
- return "Y";
- else
- return "N";
+ if (!n1.equals("U") && !n2.equals("U") && n1.equals(n2)){
+ return true;
+ }
+ return false;
}
- public String calcNumberMatchI () {
+ public boolean calcNumberMatchI () {
String n1 = number(m1);
String n2 = number(m2);
- if (!n1.equals("U") && !n2.equals("U") && !n1.equals(n2))
- return "Y";
- else
- return "N";
+ if (!n1.equals("U") && !n2.equals("U") && !n1.equals(n2)){
+ return true;
+ }
+ return false;
}
- public String calcNumberMatchNA () {
+ public boolean calcNumberMatchNA () {
String n1 = number(m1);
String n2 = number(m2);
- if (n1.equals("U") || n2.equals("U"))
- return "Y";
- else return "N";
+ if (n1.equals("U") || n2.equals("U")){
+ return true;
+ }
+ return false;
}
- public String calcNumberMatch () {
- String n1 = number(m1);
- String n2 = number(m2);
- if (n1.equals("U") || n2.equals("U"))
- return "NA";
- else if (n1.equals(n2))
- return "C";
- else
- return "I";
- }
+// public String calcNumberMatch () {
+// String n1 = number(m1);
+// String n2 = number(m2);
+// if (n1.equals("U") || n2.equals("U"))
+// return "NA";
+// else if (n1.equals(n2))
+// return "C";
+// else
+// return "I";
+// }
// heuristics
// public String calcAppositive () {
@@ -208,51 +227,57 @@ public class PairAttributeCalculator ext
// else return "no";
// }
- public String calcWnClassC () {
+ public boolean calcWnClassC () {
if (m1.getContent() instanceof IdentifiedAnnotation &&
m2.getContent() instanceof IdentifiedAnnotation) {
- IdentifiedAnnotation ne1 = (IdentifiedAnnotation) m1.getContent();
- IdentifiedAnnotation ne2 = (IdentifiedAnnotation) m2.getContent();
- if (ne1.getTypeID() == ne2.getTypeID())
- return "C";
- else return "N";
- } else
- return "N";
+ IdentifiedAnnotation ne1 = (IdentifiedAnnotation) m1.getContent();
+ IdentifiedAnnotation ne2 = (IdentifiedAnnotation) m2.getContent();
+ if (ne1.getTypeID() == ne2.getTypeID()){
+ return true;
+ }
+ return false;
+ }
+ return false;
}
- public String calcWnClassI () {
+ public boolean calcWnClassI () {
if (m1.getContent() instanceof IdentifiedAnnotation &&
m2.getContent() instanceof IdentifiedAnnotation) {
- IdentifiedAnnotation ne1 = (IdentifiedAnnotation) m1.getContent();
- IdentifiedAnnotation ne2 = (IdentifiedAnnotation) m2.getContent();
- if (ne1.getTypeID() != ne2.getTypeID())
- return "Y";
- else return "N";
- } else
- return "N";
+ IdentifiedAnnotation ne1 = (IdentifiedAnnotation) m1.getContent();
+ IdentifiedAnnotation ne2 = (IdentifiedAnnotation) m2.getContent();
+ if (ne1.getTypeID() != ne2.getTypeID()){
+ return true;
+ }
+ return false;
+ }
+ return false;
}
- public String calcWnClassNA () {
+ public boolean calcWnClassNA () {
if (!(m1.getContent() instanceof IdentifiedAnnotation) ||
- !(m2.getContent() instanceof IdentifiedAnnotation))
- return "Y";
- else
- return "N";
+ !(m2.getContent() instanceof IdentifiedAnnotation)){
+ return true;
+ }
+ return false;
}
- public String calcWnClass () {
+ public boolean calcWnClass () {
if (m1.getContent() instanceof IdentifiedAnnotation &&
- m2.getContent() instanceof IdentifiedAnnotation) {
+ m2.getContent() instanceof IdentifiedAnnotation) {
IdentifiedAnnotation ne1 = (IdentifiedAnnotation) m1.getContent();
IdentifiedAnnotation ne2 = (IdentifiedAnnotation) m2.getContent();
- if (ne1.getTypeID() == ne2.getTypeID())
- return "C";
- else return "I";
- } else
- return "NA";
+ if (ne1.getTypeID() == ne2.getTypeID()){
+ return true;
+ }
+ }
+ return false;
}
- public String calcAlias () {
+ public boolean calcAlias () {
+ return alias;
+ }
+
+ public boolean isAlias(){
try{
if (m1.getContent() instanceof IdentifiedAnnotation &&
m2.getContent() instanceof IdentifiedAnnotation) {
@@ -269,22 +294,22 @@ public class PairAttributeCalculator ext
for (int i = 0; i < fsa.size(); ++i)
if (fsa.get(i) instanceof UmlsConcept &&
l.contains(((UmlsConcept)fsa.get(i)).getCui()))
- return "C";
+ return true;
}
}catch(Exception e){
System.err.println("Error here!");
}
- return "I";
+ return false;
}
// PRO_STR in Ng and Cardie
- public String calcProStr () {
+ public boolean calcProStr () {
if (isPronominal(m1) &&
isPronominal(m2) &&
- s1.equalsIgnoreCase(s2))
- return "C";
- else
- return "I";
+ ms1.equalsIgnoreCase(ms2)){
+ return true;
+ }
+ return false;
}
// public String calcPnStr () {
@@ -298,15 +323,15 @@ public class PairAttributeCalculator ext
// }
// WORDS_STR in Ng and Cardie - currently not used
- public String calcWordsStr () {
+ public boolean calcWordsStr () {
if (!isPronominal(m1) && !isPronominal(m2) &&
- s1.equalsIgnoreCase(s2))
- return "C";
- else
- return "I";
+ ms1.equalsIgnoreCase(ms2)){
+ return true;
+ }
+ return false;
}
- private String removeArticleAndDemon (String s) {
+ private static String removeArticleAndDemon(String s){
if (s.toLowerCase().startsWith("a "))
return s.substring(2);
else if (s.toLowerCase().startsWith("an "))
@@ -326,32 +351,35 @@ public class PairAttributeCalculator ext
}
// SOON_STR_NONPRO from Ng and Cardie
- public String calcSoonStrNonpro () {
+ public boolean calcSoonStrNonpro () {
if (!isPronominal(m1) && !isPronominal(m2)) {
- String str1 = removeArticleAndDemon(s1);
- String str2 = removeArticleAndDemon(s2);
+ String str1 = removeArticleAndDemon(ms1);
+ String str2 = removeArticleAndDemon(ms2);
if (str1.toLowerCase().indexOf(str2.toLowerCase()) >= 0 ||
- str2.toLowerCase().indexOf(str1.toLowerCase()) >= 0)
- return "C";
+ str2.toLowerCase().indexOf(str1.toLowerCase()) >= 0){
+ return true;
+ }
}
- return "I";
+ return false;
}
// WORD_OVERLAP from Ng and Cardie 02
- public String calcWordOverlap () {
+ public boolean calcWordOverlap () {
ArrayList<String> t1 = contentWords(m1);
ArrayList<String> t2 = contentWords(m2);
- for (String s : t2)
- if (t1.contains(s))
- return "C";
- return "I";
+ for (String s : t2){
+ if (t1.contains(s)){
+ return true;
+ }
+ }
+ return false;
}
// TODO with syntax
// MODIFIER from Ng and Cardie 02
- public String calcModifier () {
- return "yes";
+ public boolean calcModifier () {
+ return true;
}
// public String calcPnSubstr () {
@@ -360,7 +388,7 @@ public class PairAttributeCalculator ext
// is l1 a proper substring of l2?
// TODO optimize with Stringbuffer instead of concatenation
- private boolean isProperSubstring (ArrayList<String> l1, ArrayList<String> l2) {
+ private static boolean isProperSubstring (ArrayList<String> l1, ArrayList<String> l2) {
String str1 = "";
String str2 = "";
for (String s : l1)
@@ -368,84 +396,84 @@ public class PairAttributeCalculator ext
for (String s: l2)
str2 += " " + s;
// FIXME This should be an AND ?
- if (str1.length()!=str2.length() || str2.indexOf(str1)>=0)
+ if (str1.length()!=str2.length() || str2.indexOf(str1)>=0){
return true;
- else
- return false;
+ }
+ return false;
}
- public String calcWordsSubstr () {
+ public boolean calcWordsSubstr () {
if (!isPronominal(m1) && !isPronominal(m2)) {
ArrayList<String> t1 = contentWords(m1);
ArrayList<String> t2 = contentWords(m2);
- if (isProperSubstring(t1, t2) || isProperSubstring(t2, t1))
- return "C";
+ if (isProperSubstring(t1, t2) || isProperSubstring(t2, t1)){
+ return true;
+ }
}
- return "I";
- }
-
- public String calcBothDefinitesC () {
- return (isDefinite(s1) && isDefinite(s2)) ? "Y" : "N";
- }
-
- public String calcBothDefinitesI () {
- return (!isDefinite(s1) && !isDefinite(s2)) ? "Y" : "N";
+ return false;
}
- public String calcBothDefinitesNA () {
- boolean b1 = isDefinite(s1);
- boolean b2 = isDefinite(s2);
- return (!(b1&&b2) && (b1||b2)) ? "Y" : "N";
+ public boolean calcBothDefinitesC () {
+ return (isDefinite(ms1) && isDefinite(ms2));
}
- public String calcBothDefinites () {
- boolean b1 = isDefinite(s1);
- boolean b2 = isDefinite(s2);
- if (b1 && b2) return "C";
- if (b1 || b2) return "NA";
- return "I";
+ public boolean calcBothDefinitesI () {
+ return (!isDefinite(ms1) && !isDefinite(ms2));
}
- public String calcBothEmbeddedC () {
- return "N"; //TODO: sketch
+ public boolean calcBothDefinitesNA () {
+ boolean b1 = isDefinite(ms1);
+ boolean b2 = isDefinite(ms2);
+ return (!(b1&&b2) && (b1||b2));
}
- public String calcBothEmbeddedI () {
- return "N"; //TODO: sketch
- }
-
- public String calcBothEmbeddedNA () {
- return "N"; //TODO: sketch
- }
+// public String calcBothDefinites () {
+// boolean b1 = isDefinite(ms1);
+// boolean b2 = isDefinite(ms2);
+// if (b1 && b2) return "C";
+// if (b1 || b2) return "NA";
+// return "I";
+// }
- public String calcBothEmbedded () {
- return "NA"; //TODO: sketch
- }
+// public String calcBothEmbeddedC () {
+// return "N"; //TODO: sketch
+// }
+//
+// public String calcBothEmbeddedI () {
+// return "N"; //TODO: sketch
+// }
+//
+// public String calcBothEmbeddedNA () {
+// return "N"; //TODO: sketch
+// }
+//
+// public String calcBothEmbedded () {
+// return "NA"; //TODO: sketch
+// }
- public String calcBothPronounsC () {
+ public boolean calcBothPronounsC () {
boolean b1 = isPronoun(m1);
boolean b2 = isPronoun(m2);
- return (b1 && b2) ? "Y" : "N";
+ return (b1 && b2);
}
- public String calcBothPronounsI () {
+ public boolean calcBothPronounsI () {
boolean b1 = isPronoun(m1);
boolean b2 = isPronoun(m2);
- return (!b1 && !b2) ? "Y" : "N";
+ return (!b1 && !b2);
}
- public String calcBothPronounsNA () {
+ public boolean calcBothPronounsNA () {
boolean b1 = isPronoun(m1);
boolean b2 = isPronoun(m2);
- return (!(b1&&b2) && (b1||b2)) ? "Y" : "N";
+ return (!(b1&&b2) && (b1||b2));
}
- public String calcBothPronouns () {
+ public boolean calcBothPronouns () {
boolean b1 = isPronoun(m1);
boolean b2 = isPronoun(m2);
- if (b1 && b2) return "C";
- if (b1 || b2) return "NA";
- return "I";
+ if (b1 && b2) return true;
+ return false;
}
// public String calcSpan () {
@@ -458,121 +486,115 @@ public class PairAttributeCalculator ext
// }
// }
- public String calcIndefinite () {
- if (s2.toLowerCase().startsWith("a ") ||
- s2.toLowerCase().startsWith("an "))
- return "I";
- else
- return "C";
+ public boolean calcIndefinite () {
+ if (ms2.toLowerCase().startsWith("a ") ||
+ ms2.toLowerCase().startsWith("an ")){
+ return false;
+ }
+ return true;
}
- public String calcPronoun () {
- return (isPronoun(m1) && !isPronoun(m2)) ? "I" : "C";
+ public boolean calcPronoun () {
+ return !(isPronoun(m1) && !isPronoun(m2));
}
// public String calcContainsPn () {
//
// }
- public String calcDefinite1 () {
- return isDefinite(s1)?"Y":"N";
+ public boolean calcDefinite1 () {
+ return isDefinite(ms1);
}
// public String calcProperNoun () {
//
// }
- public String calcIsDrug () {
+ public boolean calcIsDrug () {
if (m1.getContent() instanceof IdentifiedAnnotation &&
- ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_DRUG)
- return "Y";
- else
- return "N";
+ ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_DRUG){
+ return true;
+ }
+ return false;
}
- public String calcIsDisorder () {
+ public boolean calcIsDisorder () {
if (m1.getContent() instanceof IdentifiedAnnotation &&
- ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_DISORDER)
- return "Y";
- else
- return "N";
+ ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_DISORDER){
+ return true;
+ }
+ return false;
}
- public String calcIsFinding () {
+ public boolean calcIsFinding () {
if (m1.getContent() instanceof IdentifiedAnnotation &&
- ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_FINDING)
- return "Y";
- else
- return "N";
+ ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_FINDING){
+ return true;
+ }
+ return false;
}
- public String calcIsProcedure () {
+ public boolean calcIsProcedure () {
if (m1.getContent() instanceof IdentifiedAnnotation &&
- ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_PROCEDURE)
- return "Y";
- else
- return "N";
+ ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_PROCEDURE){
+ return true;
+ }
+ return false;
}
- public String calcIsAnatomicalSite () {
+ public boolean calcIsAnatomicalSite () {
if (m1.getContent() instanceof IdentifiedAnnotation &&
- ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_ANATOMICAL_SITE)
- return "Y";
- else
- return "N";
+ ((IdentifiedAnnotation)m1.getContent()).getTypeID() == CONST.NE_TYPE_ID_ANATOMICAL_SITE){
+ return true;
+ }
+ return false;
}
- public double calcNegatedBoth(){
+ public boolean calcNegatedBoth(){
if(a1 instanceof EntityMention && a2 instanceof EntityMention){
if(((EntityMention)a1).getPolarity() == -1 &&
((EntityMention)a2).getPolarity() == -1){
- return 1.0;
- }else{
- return 0.0;
+ return true;
}
- }else{
- return 0.0;
}
+ return false;
}
- public double calcNonNegatedBoth(){
+ public boolean calcNonNegatedBoth(){
if(a1 instanceof EntityMention && a2 instanceof EntityMention){
if(((EntityMention)a1).getPolarity() == 1.0 &&
((EntityMention)a2).getPolarity() == 1.0){
- return 1.0;
- }else{
- return 0.0;
+ return true;
}
- }else{
- return 0.0;
}
+ return false;
}
- public String calcClosestComp () {
- if (calcWnClass().equals("C")) {
+ public boolean calcClosestComp () {
+ if (calcWnClass()) {
ArrayList<Annotation> l = AnnotationSelector.selectNE(jcas);
int m2type = ((IdentifiedAnnotation)m2.getContent()).getTypeID();
for (Annotation a : l) {
if (((IdentifiedAnnotation)a).getTypeID()==m2type &&
a.getBegin()>=m1.getEnd() &&
a.getEnd()<=m2.getBegin())
- return "I";
+ return false;
}
- return "C";
+ return true;
}
- return "I";
+ return false;
}
- public String calcNPHead () {
+ public boolean calcNPHead () {
Annotation a = m1.getContent();
// return (a.getEnd()==m1.getEnd() && a.getBegin()>m1.getBegin()) ? "yes" : "no";
FSIterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(LookupWindowAnnotation.type).iterator();
while (iter.hasNext()) {
LookupWindowAnnotation lwa = (LookupWindowAnnotation) iter.next();
if (lwa.getBegin()<=a.getBegin() && lwa.getEnd()==a.getEnd())
- return "yes";
+ return true;
}
- return "no";
+ return false;
}
@@ -584,4 +606,24 @@ public class PairAttributeCalculator ext
return 0.0;
}
+ public boolean calcAliasDrug (){
+ return (alias && calcIsDrug());
+ }
+
+ public boolean calcAliasDisorder(){
+ return (alias && calcIsDisorder());
+ }
+
+ public boolean calcAliasFinding(){
+ return (alias && calcIsFinding());
+ }
+
+ public boolean calcAliasProcedure(){
+ return (alias && calcIsProcedure());
+ }
+
+ public boolean calcAliasAnatomy(){
+ return (alias && calcIsAnatomicalSite());
+ }
+
}
Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SvmVectorCreator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SvmVectorCreator.java?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SvmVectorCreator.java (original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SvmVectorCreator.java Mon Jan 7 22:49:52 2013
@@ -18,39 +18,44 @@
*/
package org.apache.ctakes.coreference.util;
+import java.io.File;
+import java.io.IOException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.LinkedList;
import java.util.HashSet;
+import java.util.LinkedList;
import libsvm.svm;
import libsvm.svm_model;
import libsvm.svm_node;
-import opennlp.tools.parser.Parse;
-
import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
import org.apache.ctakes.constituency.parser.util.TreeUtils;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-
+import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.coreference.type.Markable;
import org.apache.ctakes.utils.tree.FragmentUtils;
import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.ctakes.utils.wiki.WikiIndex;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.pear.util.FileUtil;
public class SvmVectorCreator {
HashSet<String> stopwords = null;
private svm_model anaph_model = null;
ArrayList<SimpleTree> frags = new ArrayList<SimpleTree>();
+ WikiIndex wiki = null;
+ static final int NUM_WIKI_HITS = 5;
public SvmVectorCreator(HashSet<String> stopwords){
this.stopwords = stopwords;
- }
-
- public SvmVectorCreator(HashSet<String> stopwords, svm_model anaph){
- this.stopwords = stopwords;
- anaph_model = anaph;
+ try{
+ wiki = new WikiIndex(NUM_WIKI_HITS, FileLocator.locateFile("org/apache/ctakes/coreference/models/index_med_5k").getAbsolutePath(), "text");
+ wiki.initialize();
+ }catch(IOException e){
+ e.printStackTrace();
+ wiki = null;
+ }
}
public svm_node[] createAnaphoricityVector(Markable m, JCas aJCas) {
@@ -118,7 +123,7 @@ public class SvmVectorCreator {
public svm_node[] getNodeFeatures(Markable anaphor, Markable antecedent, JCas aJCas, boolean needsAnaph) {
LinkedList<svm_node> nodes = new LinkedList<svm_node>();
String[] feats = FeatureVector.getNECorefFeatures();
- SyntaxAttributeCalculator sac = new SyntaxAttributeCalculator(aJCas, antecedent, anaphor);
+ SyntaxAttributeCalculator sac = new SyntaxAttributeCalculator(aJCas, antecedent, anaphor, wiki);
sac.setStopWordsList(stopwords);
int ind = 0;
for (int i = 0; i < feats.length; i++, ind++) {
@@ -183,7 +188,15 @@ public class SvmVectorCreator {
n.value = (Double) val;
nodes.add(n);
}
+ }else if (val instanceof Boolean) {
+ if((Boolean) val == true){
+ svm_node n = new svm_node();
+ n.index = ind + 1;
+ n.value = 1.0;
+ nodes.add(n);
+ }
}
+
}
} catch (Exception e) { e.printStackTrace(); }
}
Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SyntaxAttributeCalculator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SyntaxAttributeCalculator.java?rev=1430073&r1=1430072&r2=1430073&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SyntaxAttributeCalculator.java (original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/SyntaxAttributeCalculator.java Mon Jan 7 22:49:52 2013
@@ -33,6 +33,7 @@ import org.apache.ctakes.core.resource.F
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.utils.wiki.WikiIndex;
import org.apache.ctakes.coreference.type.Markable;
import org.apache.ctakes.coreference.type.DemMarkable;
import org.apache.ctakes.coreference.type.NEMarkable;
@@ -47,6 +48,9 @@ public class SyntaxAttributeCalculator e
ConllDependencyNode depLca=null;
String path = null;
String depPath = null;
+ WikiIndex wiki = null;
+ double sim1=-1.0;
+ double sim2=-1.0;
private static int numNEFeats = 0;
private static int numDemFeats = 0;
private static int numPronFeats = 0;
@@ -77,16 +81,14 @@ public class SyntaxAttributeCalculator e
static int[] selFeats = {0};
static int[] pronSelFeats = {0};
- static{
- // TODO initialize feature types...
- // read in feature files for each classifier type
- // TODO don't hard code these file names or at least not the assumption they are not in a subdir of what's on the path
- featSet = loadFeatures(selFeats, "ngramids.mayo.txt");
- pronFeatSet = loadFeatures(pronSelFeats, "pronngramids.mayo.txt");
- numNEFeats = selFeats.length;
- numDemFeats = 0;
- numPronFeats = pronSelFeats.length;
- }
+// static{
+// // read in feature files for each classifier type
+// featSet = loadFeatures(selFeats, "ngramids.mayo.txt");
+// pronFeatSet = loadFeatures(pronSelFeats, "pronngramids.mayo.txt");
+// numNEFeats = selFeats.length;
+// numDemFeats = 0;
+// numPronFeats = pronSelFeats.length;
+// }
static ArrayList<String> loadFeatures(int[] featInds, String filename){
ArrayList<String> feats = new ArrayList<String>();
@@ -113,7 +115,11 @@ public class SyntaxAttributeCalculator e
return feats;
}
- public SyntaxAttributeCalculator(JCas jcas, Markable m1, Markable m2) {
+ public SyntaxAttributeCalculator(JCas jcas, Markable m1, Markable m2){
+ this(jcas,m1,m2,null);
+ }
+
+ public SyntaxAttributeCalculator(JCas jcas, Markable m1, Markable m2, WikiIndex wiki) {
super(jcas,m1,m2);
n1 = MarkableTreeUtils.markableNode(jcas, m1.getBegin(), m1.getEnd());
n2 = MarkableTreeUtils.markableNode(jcas, m2.getBegin(), m2.getEnd());
@@ -121,13 +127,13 @@ public class SyntaxAttributeCalculator e
while(true){
if(n1 == null || lca == null || lca.getBegin() <= n1.getBegin()){
break;
- }else{
- lca = lca.getParent();
}
+ lca = lca.getParent();
}
ngrams = new HashMap<String,Integer>();
calcFullPath();
-
+ this.wiki = wiki;
+ if(this.wiki != null) initWikiSim();
// c1 = MarkableDepUtils.markableNode(jcas, m1.getBegin(), m1.getEnd(), n1);
// c2 = MarkableDepUtils.markableNode(jcas, m2.getBegin(), m2.getEnd(), n2);
// depLca = getDepLCA(c1,c2);
@@ -138,12 +144,11 @@ public class SyntaxAttributeCalculator e
public static int getNumDemFeats(){ return numDemFeats; }
public static int getNumPronFeats(){ return numPronFeats; }
- private String calcNPunderPP(TreebankNode n){
+ private static String calcNPunderPP(TreebankNode n){
if(n != null && n.getParent() != null && n.getParent().getNodeType().equals("PP")){
return "Y";
- }else{
- return "N";
}
+ return "N";
}
public String calcNPunderPP1(){
@@ -154,12 +159,11 @@ public class SyntaxAttributeCalculator e
return calcNPunderPP(n2);
}
- private String calcNPunderS(TreebankNode n){
+ private static String calcNPunderS(TreebankNode n){
if(n != null && n.getParent() != null && n.getParent().getNodeType().equals("S")){
return "Y";
- }else{
- return "N";
}
+ return "N";
}
public String calcNPunderS1(){
@@ -170,12 +174,11 @@ public class SyntaxAttributeCalculator e
return calcNPunderS(n2);
}
- private String calcNPunderVP(TreebankNode n){
+ private static String calcNPunderVP(TreebankNode n){
if(n != null && n.getParent() != null && n.getParent().getNodeType().equals("VP")){
return "Y";
- }else{
- return "N";
}
+ return "N";
}
public String calcNPunderVP1(){
@@ -186,31 +189,71 @@ public class SyntaxAttributeCalculator e
return calcNPunderVP(n2);
}
- public String calcNPSubj(TreebankNode n){
- if(n == null) return "N";
+ public boolean calcNPSubj(TreebankNode n){
+ if(n == null) return false;
if(n.getNodeType().equals("NP")){
StringArray tags = n.getNodeTags();
- if(tags.size() > 0){
+ if(tags != null && tags.size() > 0){
for(int i = 0; i < tags.size(); i++){
if(tags.get(i).equals("SBJ")){
- return "Y";
+ return true;
}
}
}
}
- return "N";
+ return false;
}
- public String calcNPSubj1(){
+ public boolean calcNPSubj1(){
return calcNPSubj(n1);
}
- public String calcNPSubj2(){
+ public boolean calcNPSubj2(){
return calcNPSubj(n2);
}
- public String calcNPSubjBoth(){
- return ((calcNPSubj1().equals("Y") && calcNPSubj2().equals("Y")) ? "Y" : "N");
+ public boolean calcNPSubjBoth(){
+ return (calcNPSubj1() && calcNPSubj2());
+ }
+
+ public void initWikiSim(){
+ if(wiki == null) sim1 = 0.0;
+ else{
+ try{
+ sim1 = wiki.getCosineSimilarity(ms1, ms2);
+ sim2 = wiki.getCosineSimilarity(es1, es2);
+ }catch(Exception e){
+ sim1 = 0.0;
+ sim2 = 0.0;
+ }
+ }
+ }
+
+ public void initEntityWikiSim(){
+ if(wiki == null) sim2 = 0.0;
+ else{
+ try{
+ sim2 = wiki.getCosineSimilarity(es1, es2);
+ }catch(Exception e){
+ sim2 = 0.0;
+ }
+ }
+ }
+
+ public double calcWikiSim(){
+ if(sim1 < 0.0) initWikiSim();
+ return sim1;
+ }
+
+ public double calcEntityWikiSim(){
+ if(sim2 < 0.0) initEntityWikiSim();
+ return sim2;
+ }
+
+ public double calcSimSum(){
+ if(sim1 < 0.0) initWikiSim();
+ if(sim2 < 0.0) initEntityWikiSim();
+ return (sim1+sim2)/2.0;
}
public int numNgrams(Markable m) throws UnexpectedException{
@@ -288,12 +331,11 @@ public class SyntaxAttributeCalculator e
}
public int getPathLength(){
- String path = calcFullPath();
String[] nodes = path.split("[<>]");
- return nodes.length;
+ return nodes.length;
}
- private ConllDependencyNode getDepLCA(ConllDependencyNode c1, ConllDependencyNode c2) {
+ private static ConllDependencyNode getDepLCA(ConllDependencyNode c1, ConllDependencyNode c2) {
HashSet<Annotation> ancestors = new HashSet<Annotation>();
ConllDependencyNode temp = null;
temp = c2.getHead();
@@ -361,7 +403,7 @@ public class SyntaxAttributeCalculator e
return depPath;
}
- private void initNGrams(HashMap<String,Integer> ngrams, String path, int n) {
+ private static void initNGrams(HashMap<String,Integer> ngrams, String path, int n) {
// Find the collection of trigrams in this string and add them to the hash map.
// start by finding the endpoint of the first trigram, then iteratively move the endpoint forward one unit
// while moving a beginning point forward one gram as well.
Added: incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fdt
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fdt?rev=1430073&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fdt
------------------------------------------------------------------------------
svn:executable = *
Propchange: incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fdt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fdx
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/resources/org/apache/ctakes/coreference/models/index_med_5k/_3.fdx?rev=1430073&view=auto
==============================================================================
Binary file - no diff available.