You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2015/11/02 17:59:52 UTC
svn commit: r1712083 [3/3] - in /ctakes/sandbox/ctakes-clinical-deid: ./
GATE/ GATE/pipeline/ GATE/plugins/ GATE/plugins/ANNIE/
GATE/plugins/ANNIE/.annie-defaults-metadata/ GATE/plugins/ANNIE/resources/
GATE/plugins/ANNIE/resources/gazetteer/ GATE/plug...
Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/other/PreProcess.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/other/PreProcess.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/other/PreProcess.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Extract.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Extract.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Extract.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Input.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Input.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Input.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Overlap.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Overlap.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Overlap.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/PassTwo.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/PassTwo.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/PassTwo.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/ui/Deid.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/ui/Deid.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/ui/Deid.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/ant-1.9.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/ant-1.9.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/ant-1.9.3.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/ant-launcher-1.9.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/ant-launcher-1.9.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/ant-launcher-1.9.3.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/aopalliance-1.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/aopalliance-1.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/aopalliance-1.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-core-0.7.2.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-core-0.7.2.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-core-0.7.2.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-dom-0.7.2.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-dom-0.7.2.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-dom-0.7.2.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/bcmail-jdk15-1.45.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/bcmail-jdk15-1.45.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/bcmail-jdk15-1.45.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/bcprov-jdk15-1.45.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/bcprov-jdk15-1.45.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/bcprov-jdk15-1.45.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-codec-1.9.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-codec-1.9.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-codec-1.9.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-compress-1.8.1.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-compress-1.8.1.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-compress-1.8.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-io-2.4.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-io-2.4.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-io-2.4.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-lang-2.6.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-lang-2.6.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-lang-2.6.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-logging-1.1.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-logging-1.1.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-logging-1.1.3.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/flying-saucer-core-9.0.4.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/flying-saucer-core-9.0.4.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/flying-saucer-core-9.0.4.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/fontbox-1.8.8.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/fontbox-1.8.8.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/fontbox-1.8.8.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/gate-asm-5.0.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/gate-asm-5.0.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/gate-asm-5.0.3.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/gate-compiler-jdt-4.3.2-P20140317-1600.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/gate-compiler-jdt-4.3.2-P20140317-1600.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/gate-compiler-jdt-4.3.2-P20140317-1600.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/gate.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/gate.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/gate.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/gateLauncher.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/gateLauncher.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/gateLauncher.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/hamcrest-core-1.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/hamcrest-core-1.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/hamcrest-core-1.3.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/ivy-2.3.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/ivy-2.3.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/ivy-2.3.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-annotations-2.3.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jackson-annotations-2.3.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-annotations-2.3.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-core-2.3.2.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jackson-core-2.3.2.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-core-2.3.2.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-databind-2.3.2.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jackson-databind-2.3.2.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-databind-2.3.2.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/java-getopt-1.0.13.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/java-getopt-1.0.13.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/java-getopt-1.0.13.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/jaxen-1.1.6.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jaxen-1.1.6.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jaxen-1.1.6.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/jdom-1.1.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jdom-1.1.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jdom-1.1.3.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/jempbox-1.8.8.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jempbox-1.8.8.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jempbox-1.8.8.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/joda-time-2.6.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/joda-time-2.6.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/joda-time-2.6.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/junit-4.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/junit-4.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/junit-4.11.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/log4j-1.2.17.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/log4j-1.2.17.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/log4j-1.2.17.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/nekohtml-1.9.14.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/nekohtml-1.9.14.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/nekohtml-1.9.14.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/pdfbox-app-1.8.10.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/pdfbox-app-1.8.10.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/pdfbox-app-1.8.10.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/poi-3.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/poi-3.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/poi-3.11.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-3.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-3.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-3.11.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-schemas-3.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-schemas-3.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-schemas-3.11.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/poi-scratchpad-3.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/poi-scratchpad-3.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/poi-scratchpad-3.11.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/spring-aop-2.5.6.SEC01.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/spring-aop-2.5.6.SEC01.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/spring-aop-2.5.6.SEC01.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/spring-beans-2.5.6.SEC01.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/spring-beans-2.5.6.SEC01.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/spring-beans-2.5.6.SEC01.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/spring-core-2.5.6.SEC01.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/spring-core-2.5.6.SEC01.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/spring-core-2.5.6.SEC01.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/stax2-api-3.1.1.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/stax2-api-3.1.1.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/stax2-api-3.1.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/tika-core-1.7.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/tika-core-1.7.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/tika-core-1.7.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/tika-parsers-1.7.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/tika-parsers-1.7.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/tika-parsers-1.7.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/validationtools.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/validationtools.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/validationtools.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/woodstox-core-lgpl-4.2.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/woodstox-core-lgpl-4.2.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/woodstox-core-lgpl-4.2.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/xercesImpl-2.9.1.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xercesImpl-2.9.1.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xercesImpl-2.9.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/xmlbeans-2.6.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xmlbeans-2.6.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xmlbeans-2.6.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/xmlunit-1.5.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xmlunit-1.5.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xmlunit-1.5.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/xpp3-1.1.4c.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xpp3-1.1.4c.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xpp3-1.1.4c.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/libs/xstream-1.4.7.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xstream-1.4.7.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.
Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xstream-1.4.7.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/controller/Controller.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/controller/Controller.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/controller/Controller.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/controller/Controller.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,287 @@
+package co.dehghan.cdeid.controller;
+
+import gate.creole.ResourceInstantiationException;
+//import validationtools.Evaluation;
+//import validationtools.GenerateGateDocument;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.filefilter.TrueFileFilter;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.util.PDFTextStripper;
+
+import co.dehghan.cdeid.io.Output;
+import co.dehghan.cdeid.pipeline.firstpass.PassOne;
+import co.dehghan.cdeid.pipeline.firstpass.ner.EmailNER;
+import co.dehghan.cdeid.pipeline.firstpass.ner.UrlNER;
+import co.dehghan.cdeid.pipeline.secondpass.Input;
+import co.dehghan.cdeid.pipeline.secondpass.PassTwo;
+import co.dehghan.cdeid.pipeline.other.PostProcess;
+import co.dehghan.cdeid.pipeline.other.PreProcess;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehgha
+ *
+ * Controller.
+ *
+ * Import lib/validationtools.jar if you intend to validate or continue to develop cDeid; see TESTING commented code.
+ * Obtain the data from http://www.i2b2.org/NLP.
+ */
+public class Controller {
+
+ public Controller(){}
+
+ /*
+ * TESTING
+
+ public static void main(String[] args) throws ResourceInstantiationException, IOException{
+ Controller c = new Controller();
+ c.run(new File("i2b2-2014/TrackI/testing-PHI-Gold-fixed/"), new File("/home/dehghana/Data/i2b2-2014/op/"), "gatexml");
+ }
+ */
+
+ /**
+ * Run NLP components:
+ * 1. Pre-processing
+ * 2. PassOne
+ * 3. PassTwo
+ * 4. Post-processing
+ *
+ * @param r_corpus input directory
+ * @param w_corpus output directory
+ * @throws ResourceInstantiationException
+ * @throws IOException
+ */
+ public void run(File input_dir, File output_dir, String outputFormat) throws ResourceInstantiationException, IOException{
+
+ PreProcess p = new PreProcess();
+
+ gate.Document gateDoc = null;
+
+ Collection<File> fileList = FileUtils.listFiles(new File(input_dir.getAbsolutePath()), TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE);
+
+ //to store processed documents @runtime
+ ArrayList<gate.Document> gateDocList = new ArrayList<gate.Document>();
+
+ /*
+ * TESTING:
+ ArrayList<gate.Document> gateList = GenerateGateDocument.getGateDocument(input_dir.getAbsolutePath());
+ */
+
+ //for each input document:
+ System.out.println("Processing document(s):");
+ for(File f: fileList){
+ System.out.print("\r"+f.getName());
+
+ /* TESTING
+ for(gate.Document gd: gateList){
+ gateDoc = gd;
+ */
+
+ gateDoc = Document.getGateDocument(f);
+ if(gateDoc == null)
+ continue;
+
+ //NLP pre-process i.e., Tokeniser and Sentence splitter
+ gateDoc.setName(f.getName()); //set name; necessary for PassTwo
+ p.preProcessingPipeline(gateDoc);
+
+ gateDocList.add(gateDoc);
+ }
+ System.out.print("\r");
+ /*
+ * run pass one
+ */
+ runFirstPassPipeline(gateDocList);
+
+ /*
+ * run pass two
+ */
+ runSecondPassPipeline(gateDocList);
+
+ /*
+ * run post processing pipeline
+ */
+ runPostProcessingPipeline(gateDocList);
+
+ /*
+ * output
+ */
+ saveNotes(output_dir, gateDocList, outputFormat);
+
+ /*
+ * evaluation
+ */
+ //runEvaluation(gateDocList);
+
+ }
+
+ /**
+ * Run first pass pipeline. Prerequisite: PreProcess.java processed gate.Documents.
+ *
+ * @param gateDocList list of gate.Documents
+ */
+ private static void runFirstPassPipeline(ArrayList<gate.Document> gateDocList){
+ PassOne p1 = new PassOne();
+ for(gate.Document g: gateDocList){
+ p1.firstPassPipeline(g);
+ UrlNER.run(g);
+ EmailNER.run(g);
+ }
+ }
+
+ /**
+ * Run second pass pipeline. Prerequisite: PassOne.java processed gate.Documents.
+ * (OBS!) Hard-coded entity types: Patient, Doctor, Idnum, Medicalrecord and Zip.
+ *
+ * @param gateDocList list of gate.Documents
+ */
+ private static void runSecondPassPipeline(ArrayList<gate.Document> gateDocList){
+ List<Input> inputList = new ArrayList<Input>();
+ String filter_pt = "(?i)take|po|i|fair|his|her|the|l|trace|seen|all|please|visit|pulses|ptt|physician|arthritis|depression|short|long|sick",
+ filter_dr = "(?i)take|po|i|fair|md";
+
+ inputList.add(new Input("NAME", "PATIENT", filter_pt));//NAME:PATIENT
+ inputList.add(new Input("NAME", "DOCTOR", filter_dr)); //NAME:DOCTOR
+ inputList.add(new Input("ID", "IDNUM", "")); //ID:IDNUM
+ inputList.add(new Input("ID", "MEDICALRECORD", "")); //ID:MEDICALRECORD
+ inputList.add(new Input("LOCATION", "ZIP", "")); //LOCATION:ZIP
+
+ PassTwo p2 = new PassTwo(gateDocList, inputList);
+ p2.secondPassPipeline();
+ }
+
+ /**
+ * Run post processing pipeline. Prerequisite: PassOne.java and PassTwo.java processed gate.Documents.
+ *
+ * @param gateDocList
+ */
+ private static void runPostProcessingPipeline(ArrayList<gate.Document> gateDocList){
+ PostProcess pp = new PostProcess(); //post processing
+ for(gate.Document g: gateDocList){
+ pp.postProcessingPipeline(g);
+ }
+ }
+
+ private static void saveNotes(File output_dir, ArrayList<gate.Document> gateDocList, String outputFormat) throws IOException{
+ for(gate.Document gateDoc: gateDocList){
+ if(outputFormat.equals("gatexml"))
+ FileUtils.writeStringToFile(new File(output_dir.getAbsolutePath()+"/"+gateDoc.getName().substring(0, gateDoc.getName().length()-3)+"xml"), Output.getGateXml(gateDoc), "UTF-8");
+ else
+ FileUtils.writeStringToFile(new File(output_dir.getAbsolutePath()+"/"+gateDoc.getName().substring(0, gateDoc.getName().length()-3)+"xml"), Output.getXml(gateDoc, "final_predictions"), "UTF-8");
+ }
+ }
+
+ /**
+ * Run evaluation.
+ *
+ * @param gateDocList
+
+ private static void runEvaluation(ArrayList<gate.Document> gateDocList){
+ Evaluation e1 = new Evaluation(),
+ e2 = new Evaluation(),
+ e3 = new Evaluation(),
+ e4 = new Evaluation(),
+ e5 = new Evaluation(),
+ e6 = new Evaluation(),
+ e7 = new Evaluation(),
+ e8 = new Evaluation(),
+ e9 = new Evaluation(),
+ e10 = new Evaluation(),
+ e11= new Evaluation(),
+ e12 = new Evaluation(),
+ e13 = new Evaluation(),
+ e14 = new Evaluation();
+
+ for(gate.Document g: gateDocList){
+ e1.procDoc(g, "PATIENT", "gold_set", "PATIENT", "final_predictions", false);
+ e2.procDoc(g, "DOCTOR", "gold_set", "DOCTOR", "final_predictions", false);
+ e3.procDoc(g, "IDNUM", "gold_set", "IDNUM", "final_predictions", false);
+ e4.procDoc(g, "MEDICALRECORD", "gold_set", "MEDICALRECORD", "final_predictions", false);
+ e5.procDoc(g, "AGE", "gold_set", "AGE", "final_predictions", false);
+ e6.procDoc(g, "ZIP", "gold_set", "ZIP", "final_predictions", false);
+ e7.procDoc(g, "PHONE", "gold_set", "PHONE", "final_predictions", false);
+ e8.procDoc(g, "FAX", "gold_set", "FAX", "final_predictions", false);
+ e9.procDoc(g, "EMAIL", "gold_set", "EMAIL", "final_predictions", false);
+ e10.procDoc(g, "URL", "gold_set", "URL", "final_predictions", false);
+ e11.procDoc(g, "STREET", "gold_set", "STREET", "final_predictions", false);
+ e12.procDoc(g, "COUNTRY", "gold_set", "COUNTRY", "final_predictions", false);
+ e13.procDoc(g, "STATE", "gold_set", "STATE", "final_predictions", false);
+ e14.procDoc(g, "USERNAME", "gold_set", "USERNAME", "final_predictions", false);
+
+ }
+ System.out.println("PATIENT:");
+ e1.printEval();
+ System.out.println("DOCTOR:");
+ e2.printEval();
+ System.out.println("IDNUM:");
+ e3.printEval();
+ System.out.println("MEDICALRECORD:");
+ e4.printEval();
+ System.out.println("AGE:");
+ e5.printEval();
+ System.out.println("ZIP:");
+ e6.printEval();
+ System.out.println("PHONE:");
+ e7.printEval();
+ System.out.println("FAX:");
+ e8.printEval();
+ System.out.println("EMAIL:");
+ e9.printEval();
+ System.out.println("URL:");
+ e10.printEval();
+ System.out.println("STREET:");
+ e11.printEval();
+ System.out.println("COUNTRY:");
+ e12.printEval();
+ System.out.println("STATE:");
+ e13.printEval();
+ System.out.println("USERNAME:");
+ e14.printEval();
+ }
+ */
+}
+
+class Document{
+ /**
+ * Read given file format.
+ *
+ * @param f file
+ * @return gate.Document
+ * @throws IOException
+ * @throws ResourceInstantiationException
+ *
+ * TODO: expand input formats ...
+ */
+ public static gate.Document getGateDocument(File f) {
+
+ PDFTextStripper stripper;
+ gate.Document gateDoc = null;
+ try {
+ stripper = new PDFTextStripper();
+
+ if(f.getName().endsWith(".txt")){
+ gateDoc = gate.Factory.newDocument(FileUtils.readFileToString(f, "UTF-8"));
+ }else if(f.getName().endsWith(".pdf")){
+ PDDocument d = PDDocument.load(f);
+ gateDoc = gate.Factory.newDocument(stripper.getText(d));
+ }else{
+ System.err.println("cDeid can only process plain text (.txt) and searchable portable document format (.pdf) documents.\n"
+ + "Amend the filename extension accordingly.");
+ System.err.println("Document err: " + f.getName());
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (ResourceInstantiationException e) {
+ e.printStackTrace();
+ }
+
+ return gateDoc;
+ }
+
+}
\ No newline at end of file
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/io/Output.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/io/Output.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/io/Output.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/io/Output.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,62 @@
+package co.dehghan.cdeid.io;
+
+import gate.Annotation;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * Generate Output.
+ */
+public class Output {
+
+ /**
+ * Gate XML
+ *
+ * @param gateDoc gate.Document
+ * @param annotationSet annotation set where all annotations are stored
+ * @return XML in String format
+ */
+ public static String getXml(gate.Document gateDoc, String annotationSet)
+ {
+ String[] labels = { "PATIENT", "DOCTOR", "USERNAME",
+ "ZIP", "STATE", "STREET", "COUNTRY",
+ "PHONE", "FAX", "URL", "EMAIL",
+ "AGE", "MEDICALRECORD", "IDNUM"};
+
+ String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
+ "<cDeid>\n" +
+ "<TEXT><![CDATA[" + gateDoc.getContent().toString() + "]]></TEXT>\n" +
+ "<TAGS>\n";
+
+ int i=1;
+ for(String label: labels)
+ {
+ List<Annotation> ann = new ArrayList<Annotation>(gateDoc.getAnnotations(annotationSet).get(label));
+ Collections.sort(ann, gate.Utils.OFFSET_COMPARATOR);
+
+ for(Annotation a: ann)
+ {
+ xml += "<"+a.getFeatures().get("CATEGORY").toString()+" id=\"P"+i+"\" "+"start=\"" + a.getStartNode().getOffset() + "\" " + "end=\""+ a.getEndNode().getOffset() +"\" " + "text=\""+ gate.Utils.stringFor(gateDoc, a) +"\" " + "TYPE=\""+a.getType().toString()+"\" />\n";
+ i++;
+ }
+ }
+ xml += "</TAGS>\n" +
+ "</cDeid >";
+
+ return xml;
+ }
+
+ /**
+ * Get GATE XML.
+ *
+ * @param gateDoc gate.Document
+ * @return gate.Document XML
+ */
+ public static String getGateXml(gate.Document gateDoc){
+ return gateDoc.toXml();
+ }
+}
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/PassOne.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/PassOne.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/PassOne.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/PassOne.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,61 @@
+package co.dehghan.cdeid.pipeline.firstpass;
+
+import java.io.File;
+import java.io.IOException;
+
+import gate.Corpus;
+import gate.CorpusController;
+import gate.Factory;
+import gate.util.GateException;
+import gate.util.persistence.PersistenceManager;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * Saves annotations to annotation set: passOne
+ *
+ * See GATE/pipeline/passOne.xgapp
+ */
+public class PassOne {
+
+ private static CorpusController passOne;
+ private static Corpus corpus;
+
+ public PassOne(){
+ PassOne.init();
+ }
+
+ private static void init(){
+ /*
+ * initialize first pass pipeline
+ */
+ String path = "GATE/pipeline/passOne.xgapp";
+ try {
+ PassOne.passOne =(CorpusController)PersistenceManager.loadObjectFromFile(new File(path));
+ PassOne.corpus = Factory.newCorpus("c1");
+ PassOne.passOne.setCorpus(corpus);
+
+ } catch (GateException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * FirstPass pipeline.
+ *
+ * @param gateDoc gate.Document
+ */
+ public void firstPassPipeline(gate.Document gateDoc)
+ {
+ try{
+ PassOne.corpus.add(gateDoc);
+ PassOne.passOne.execute();
+ } catch (GateException e) {
+ System.err.println("Pipeline.firstPassPipeline(...): " + e.getMessage() );
+ }
+ PassOne.corpus.clear();
+ PassOne.passOne.cleanup();
+ }
+}
\ No newline at end of file
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/EmailNER.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/EmailNER.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/EmailNER.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/EmailNER.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,46 @@
+package co.dehghan.cdeid.pipeline.firstpass.ner;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import gate.util.InvalidOffsetException;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * Saves annotations to annotation set: passOne
+ *
+ * Email NER for GATE documents.
+ */
+public class EmailNER {
+
+ private static final String EMAIL_PATTERN =
+ "[_A-Za-z0-9-\\+]+(\\.[_A-Za-z0-9- ]+)*@"
+ + "[A-Za-z0-9-]+(\\.[A-Za-z0-9]+)*(\\.(com|org|edu|gov|mil|co\\.uk))"; //expand
+
+ public static void run(gate.Document gateDoc)
+ {
+ String text = gateDoc.getContent().toString();
+ Pattern p= Pattern.compile(EMAIL_PATTERN, Pattern.CASE_INSENSITIVE);
+ Matcher m = p.matcher(text);
+
+ while(m.find())
+ {
+ addAnnotation(gateDoc, m.start(), m.end());
+ }
+ }
+
+ private static void addAnnotation(gate.Document gateDoc, int start, int end)
+ {
+ gate.FeatureMap gateMap = gate.Factory.newFeatureMap();
+ gateMap.put("CATEGORY", "CONTACT");
+
+ try {
+ //annotation set: final_predictions, category: CONTACT, label: EMAIL
+ gateDoc.getAnnotations("passOne").add((long)start, (long)end, "EMAIL", gateMap);
+ } catch (NumberFormatException e) {
+ e.printStackTrace();
+ } catch (InvalidOffsetException e) {
+ e.printStackTrace();
+ }
+ }
+}
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/UrlNER.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/UrlNER.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/UrlNER.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/UrlNER.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,45 @@
+package co.dehghan.cdeid.pipeline.firstpass.ner;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import gate.util.InvalidOffsetException;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * Save annotations to annotation set: passOne
+ *
+ * URL NER for GATE documents.
+ */
+public class UrlNER {
+ private static final String URL_PATTERN =
+ "(https?://)?(www.)([a-zA-Z0-9]+).[a-zA-Z0-9]*.[a-z]{3}.?([a-z]+)?(/[a-zA-Z0-9]+)?";
+
+ public static void run(gate.Document gateDoc)
+ {
+ String text = gateDoc.getContent().toString();
+ Pattern p= Pattern.compile(URL_PATTERN, Pattern.CASE_INSENSITIVE);
+ Matcher m = p.matcher(text);
+
+ while(m.find())
+ {
+ addAnnotation(gateDoc, m.start(), m.end());
+ }
+ }
+
+ private static void addAnnotation(gate.Document gateDoc, int start, int end)
+ {
+ gate.FeatureMap gateMap = gate.Factory.newFeatureMap();
+ gateMap.put("CATEGORY", "CONTACT");
+
+ try {
+ //annotation set: final_predictions, category: CONTACT, label: URL
+ gateDoc.getAnnotations("passOne").add((long)start, (long)end, "URL", gateMap);
+ } catch (NumberFormatException e) {
+ e.printStackTrace();
+ } catch (InvalidOffsetException e) {
+ e.printStackTrace();
+ }
+ }
+}
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PostProcess.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PostProcess.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PostProcess.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PostProcess.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,60 @@
+package co.dehghan.cdeid.pipeline.other;
+
+import java.io.File;
+import java.io.IOException;
+
+import gate.Corpus;
+import gate.CorpusController;
+import gate.Factory;
+import gate.util.GateException;
+import gate.util.persistence.PersistenceManager;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * Saves annotations to annotation set: final_predictions
+ *
+ * See GATE/pipeline/postProcessing.xgapp
+ */
+public class PostProcess {
+
+ private static CorpusController postProc;
+ private static Corpus corpus;
+
+ public PostProcess(){
+ PostProcess.init();
+ }
+
+ private static void init(){
+ /*
+ * post-processing pipeline
+ */
+ String path = "GATE/pipeline/postProcessing.xgapp";
+ try {
+ PostProcess.postProc = (CorpusController)PersistenceManager.loadObjectFromFile(new File(path));
+ PostProcess.corpus = Factory.newCorpus("c1");
+ PostProcess.postProc.setCorpus(corpus);
+ } catch (GateException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Post processing pipeline.
+ *
+ * @param gateDoc gate.Document
+ */
+ public void postProcessingPipeline(gate.Document gateDoc)
+ {
+ try{
+ PostProcess.corpus.add(gateDoc);
+ PostProcess.postProc.execute();
+ } catch (GateException e) {
+ System.err.println("Pipeline.postProcessingPipeline(...): " + e.getMessage() );
+ }
+ PostProcess.corpus.clear();
+ PostProcess.postProc.cleanup();
+ }
+}
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PreProcess.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PreProcess.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PreProcess.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PreProcess.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,84 @@
+package co.dehghan.cdeid.pipeline.other;
+
+import gate.Corpus;
+import gate.CorpusController;
+import gate.Factory;
+import gate.Gate;
+import gate.creole.CreoleRegisterImpl;
+import gate.util.GateException;
+import gate.util.Out;
+import gate.util.persistence.PersistenceManager;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * See GATE/pipeline/preProcessing.xgapp
+ */
+public class PreProcess {
+
+ private static CorpusController preProc;
+ private static Corpus corpus;
+
+ public PreProcess()
+ {
+ PreProcess.init();
+ }
+ private static void init()
+ {
+ Logger log = Logger.getLogger(CreoleRegisterImpl.class);
+ log.setLevel(Level.OFF);
+
+ Out.prln("\n cDeid (US) v0.1, Copyright (C) 2015 Azad Dehghan");
+ Out.prln("\n.Initialising pipeline ...");
+
+ try {
+ /*
+ * init GATE
+ */
+ Gate.setPluginsHome(new File("GATE/"));
+ Gate.setGateHome(new File("GATE/"));
+ Gate.runInSandbox(true);
+ Gate.init();
+
+ /*
+ * Init pre-processing pipeline:
+ * 1.Tokenizer
+ * 2.Sentence splitter
+ */
+ String path = "GATE/pipeline/preProcessing.xgapp";
+ preProc = (CorpusController) PersistenceManager.loadObjectFromFile(new File(path));
+ corpus = Factory.newCorpus("c1");
+ preProc.setCorpus(corpus);
+
+ } catch (GateException e) {
+ System.err.println("Pipeline.initGate(): " + e.getMessage());
+ System.exit(1);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ Out.prln(".Initialisation completed ...");
+ }
+
+ /**
+ * Pre-processing pipeline.
+ *
+ * @param gateDoc gate.Document
+ */
+ public void preProcessingPipeline(gate.Document gateDoc)
+ {
+ try{
+ corpus.add(gateDoc);
+ preProc.execute();
+ } catch (GateException e) {
+ System.err.println("Pipeline.preProcessingPipeline(...): " + e.getMessage() );
+ }
+ corpus.clear();
+ preProc.cleanup();
+ }
+}
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/Input.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/Input.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/Input.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/Input.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,49 @@
+package co.dehghan.cdeid.pipeline.secondpass;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * Input for PassTwo.
+ */
+public class Input {
+
+ private String type;
+ private String category;
+ private String filter;
+
+ /**
+ *
+ * @param category CATEGORY
+ * @param type TYPE
+ * @param filter word filter separated
+ */
+ public Input(String category, String type, String filter){
+ this.category = category;
+ this.type = type;
+ this.filter = filter;
+ }
+
+ public void setType(String type){
+ this.type = type;
+ }
+
+ public String getType(){
+ return type;
+ }
+
+ public void setCategory(String category){
+ this.category = category;
+ }
+
+ public String getCategory(){
+ return category;
+ }
+
+ public void setFilter(String filter){
+ this.filter = filter;
+ }
+
+ public String getFilter(){
+ return filter;
+ }
+}
\ No newline at end of file
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/PassTwo.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/PassTwo.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/PassTwo.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/PassTwo.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,247 @@
+package co.dehghan.cdeid.pipeline.secondpass;
+
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.Factory;
+import gate.util.InvalidOffsetException;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import co.dehghan.cdeid.pipeline.secondpass.Overlap;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * Takes initial system annotations (first-pass) and compiles a run-time and patient-level dictionary and
+ * finally applies dictionary matching (second-pass) as final annotations.
+ *
+ * Saves annotations to annotation set: passTwo
+ *
+ * PassTwo.java v1.
+ */
+public class PassTwo {
+
+ private static LinkedHashMap<String, String[]> dictionary; //patient-level dictionaries; k: patient-id, v: dictionary
+ private static ArrayList<gate.Document> gateDocList;
+ private static List<Input> inputList;
+
+ /**
+ * @param gateDocList list of gate.Documents.
+ * @param pass2InputList list of inputs for the second pass.
+ */
+ public PassTwo(ArrayList<gate.Document> gateDocList, List<Input> pass2InputList){
+ PassTwo.gateDocList = gateDocList;
+ PassTwo.inputList = pass2InputList;
+ }
+
+ public void secondPassPipeline(){
+ //for each
+ for(Input in: inputList){
+ Extract e = new Extract();
+ for(gate.Document gateDoc: gateDocList){
+ e.extractAnnotatedSpan(gateDoc, in.getType(), "passOne"); //gateDoc, label/type, annotationSet where the annotations are stored.
+ }
+ /* TESTING
+ *///System.out.println(in.getType() + " dictionary size: " +e.getDictionary().size());
+ setPatientLevelDictionaries(e.getDictionary());
+ run(in.getType(), in.getCategory(), in.getFilter());
+ }
+ }
+
+ /**
+ * Run second pass.
+ */
+ private static void run(String label, String category, String filter)
+ {
+ gate.FeatureMap gateMap;
+
+ Pattern p = null;
+ Matcher m;
+ for(gate.Document gd: gateDocList)
+ {
+ //create new annotations in the 'predictions' annotationSet
+ //set annotation feature: CATEGORY
+ gateMap = Factory.newFeatureMap();
+ gateMap.put("CATEGORY", category);
+
+ /* TESTING
+ */ //System.out.println(gd.getName() +"\t"+ gd.getName().substring(0, gd.getName().indexOf("-")));
+ //System.out.println(dictionary.size());
+
+
+ //sanity check document contains initial/pass-one predictions
+ if(dictionary.containsKey(gd.getName().substring(0, gd.getName().indexOf("-")))){
+ //get patient-level dictionary
+ String[] ptLevelDictionary = dictionary.get(gd.getName().substring(0, gd.getName().indexOf("-")));
+
+ //get text
+ String gateDocText = gd.getContent().toString();
+
+ //NB: this may create overlapping annotations
+ for(String s: ptLevelDictionary)
+ {
+ if(!s.matches(filter)){
+ p = Pattern.compile(s, Pattern.CASE_INSENSITIVE);
+ m = p.matcher(gateDocText);
+
+ //for each match create a new annotation
+ while(m.find()){ //or matches?
+ addAnnotation(gd, gateMap, label, m.start(), m.end());
+ }
+ }
+ }
+ //remove overlaps that may be propagated by the the dictionary matching
+ //TODO: proper longest match prediction would make these step unnecessary.
+ Overlap.rmOverlap(gd, label, "passTwo");
+ if(label.equals("PATIENT")||label.equals("DOCTOR"))
+ Overlap.rmOverlap(gd, label, "passTwo");
+ }
+ }
+ }
+
+ /**
+ * Create gate annotation.
+ *
+ * @param gateDoc gate.Document
+ * @param start offset
+ * @param end offset
+ */
+ private static void addAnnotation(gate.Document gateDoc, gate.FeatureMap gateMap, String label, int start, int end)
+ {
+ try {
+ gateDoc.getAnnotations("passTwo").add((long)start, (long)end, label, gateMap);
+ } catch (NumberFormatException e) {
+ e.printStackTrace();
+ } catch (InvalidOffsetException e) {
+ e.printStackTrace();
+ }
+ }
+
+
+ private static void setPatientLevelDictionaries(LinkedHashMap<String, String> listDictionary){
+ PassTwo.dictionary = getAllPatientLevelDictionaries(listDictionary);
+ }
+
+ /**
+ * Pre-process 'raw' dictionaries compiled (with Extract.class) using pass-one annotations.
+ *
+ * 1. convert LinkedHashMap<String, String> to LinkedHashMap<String, String[]>
+ * 2. remove empty dictionary entries; remove newline characters; escape known special symbols: '.'
+ * 3. Prepare entries for java.regex (word-boundary) matching
+ * 4. remove duplicate dictionary entries
+ * @param listDictionary k: patient-id, v: extracted NEs separated by '\n'
+ * @return ready-to-use run-time dictionaries, k: patient-id, v: corresponding dictionary
+ */
+ private static LinkedHashMap<String, String[]> getAllPatientLevelDictionaries(LinkedHashMap<String, String> listDictionary){
+
+ LinkedHashMap<String, String[]> allPtDictionaries = new LinkedHashMap<String, String[]>();
+ Set<String> kSet = listDictionary.keySet();
+ for(String k: kSet){
+ List<String> list = new ArrayList<String>(Arrays.asList(listDictionary.get(k).split("\n")));
+
+ for(int i=0;i<list.size();i++){
+ if(list.get(i).equals("")) //remove empty entries
+ list.remove(i);
+ else{
+ list.set(i, list.get(i).trim().replaceAll("\n", "")); //remove \newline
+ list.set(i, list.get(i).replaceAll("\\.", "\\\\.")); //escape dots due to subseq. java.regex matching
+ list.set(i, "\\b"+list.get(i)+"\\b"); //word matching
+ }
+ }
+ //remove duplicate entries by converting to Set
+ LinkedHashSet<String> tempDic = new LinkedHashSet<String>(list);
+ allPtDictionaries.put(k, tempDic.toArray(new String[0]));
+
+ }
+ return allPtDictionaries;
+ }
+
+}
+
+/**
+ * A class to compile an initial 'raw' dictionary.
+ */
+class Extract {
+
+ static LinkedHashMap<String, String> ListDictionary;
+
+ public Extract()
+ {
+ ListDictionary = new LinkedHashMap<String, String>();
+ }
+
+ /**
+ * Extract annotations at the patient-level using file names as ID/Key and construct a 'raw' dictionary.
+ *
+ * @param gateDoc gate.Document
+ * @param type entity type label
+ * @param annSet annotation set where firstPass annotations are stored.
+ */
+ public void extractAnnotatedSpan(gate.Document gateDoc, String type, String annSet)
+ {
+ AnnotationSet as = gateDoc.getAnnotations(annSet).get(type);
+ String tkey = gateDoc.getName().substring(0, gateDoc.getName().indexOf("-"));//L100-xx.xml -> L100
+
+ /* TESTING
+ */ //System.out.println( type + "\t fileID:" + tkey + "\t AnnSet.size():" + as.size());
+
+ for(Annotation a: as)
+ {
+ if(!ListDictionary.containsKey(tkey))
+ ListDictionary.put(tkey, "");
+
+ if(ListDictionary.get(tkey).equals("")) //if empty
+ ListDictionary.put(tkey, gate.Utils.stringFor(gateDoc, a));
+ else
+ ListDictionary.put(tkey, ListDictionary.get(tkey) + "\n" + gate.Utils.stringFor(gateDoc, a));
+
+ }
+ }
+
+ /**
+ * @return A 'raw' dictionary with k: patient-id , v: extracted annotations separated with "\n"
+ */
+ public LinkedHashMap<String, String> getDictionary(){
+ return ListDictionary;
+ }
+}
+
+/**
+ * Remove overlapping annotation. Overlap needed for the poor dictionary matching implemented.
+ *
+ * TODO: handle/remove overlap > 2 annotations
+ */
+class Overlap {
+ public static void rmOverlap(gate.Document gateDoc, String annType, String annSet)
+ {
+ //annType: annotation type to process
+ ArrayList<Annotation> ann = new ArrayList<Annotation>(gateDoc.getAnnotations(annSet).get(annType));
+ Collections.sort(ann, gate.Utils.OFFSET_COMPARATOR);
+
+ for(int i=0;i<ann.size() && i+1 != ann.size();i++){
+ for(int j=i+1;j<ann.size();j++) //j=next, i previous
+ {
+ if((ann.get(j).getStartNode().getOffset() >= ann.get(i).getStartNode().getOffset() && ann.get(j).getStartNode().getOffset() <= ann.get(i).getEndNode().getOffset())
+ ||
+ (ann.get(j).getEndNode().getOffset() <= ann.get(i).getEndNode().getOffset() && ann.get(j).getEndNode().getOffset() >= ann.get(i).getStartNode().getOffset()))
+ {
+ if((ann.get(i).getEndNode().getOffset() - ann.get(i).getStartNode().getOffset()) >= (ann.get(j).getEndNode().getOffset() - ann.get(j).getStartNode().getOffset())){
+ gateDoc.getAnnotations(annSet).remove(ann.remove(j));
+ j--;
+ }
+ else{
+ gateDoc.getAnnotations(annSet).remove(ann.remove(i));
+ }
+ }
+ }
+ }
+ }
+}
Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/ui/Deid.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/ui/Deid.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/ui/Deid.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/ui/Deid.java Mon Nov 2 16:59:48 2015
@@ -0,0 +1,67 @@
+package co.dehghan.cdeid.ui;
+
+import gate.creole.ResourceInstantiationException;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URISyntaxException;
+
+import co.dehghan.cdeid.controller.Controller;
+
+/**
+ * cDeid Copyright (C) 2015 Azad Dehghan
+ *
+ * User interface.
+ */
+public class Deid {
+
+ public static void main(String[] args) throws URISyntaxException, IOException, ResourceInstantiationException
+ {
+ try {
+ parseCmdLine(args);
+ } catch (IOException e) {
+ System.err.println(e.getMessage());
+ usage();
+ }
+ }
+
+ /**
+ * print usage information.
+ *
+ * @return
+ */
+ private static String usage()
+ {
+ return "\n****************************************************************************************" +
+ "\n cDeid Copyright (C) 2015 Azad Dehghan, v.0.1 (US)\n\n"
+
+ + "Usage: java -jar Deid.jar [--xml|--gatexml] <input_dir> <output_dir>\n\n" +
+ "*****************************************************************************************\n";
+ }
+
+ private static void parseCmdLine(String[] args) throws IOException, ResourceInstantiationException
+ {
+ Controller controller = new Controller();
+
+ if(args.length < 2 || args.length > 3){
+ System.out.println(usage());
+ }
+ else if(args[0].toLowerCase().contentEquals("--gatexml"))
+ {
+ String r_corpus = args[1];
+ String w_output = args[2];
+ controller.run(new File(r_corpus), new File(w_output), "gatexml");
+ }
+ else if(args[0].toLowerCase().contentEquals("--xml"))
+ {
+ String r_corpus = args[1];
+ String w_output = args[2];
+ controller.run(new File(r_corpus), new File(w_output), "xml");
+ }
+ else if(args[0].toLowerCase().contentEquals("--help")||args[0].toLowerCase().contentEquals("-h"))
+ System.out.println(usage());
+ else
+ System.err.println(usage());
+ }
+}
+