You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by pk...@apache.org on 2016/06/03 09:48:41 UTC
svn commit: r1746700 - in /ctakes/sandbox/ctakes-clinical-deid/src/main:
java/org/apache/ctakes/deid/ resources/org/apache/ctakes/deid/types/
ruta/org/apache/ctakes/deid/ xsd/
Author: pkluegl
Date: Fri Jun 3 09:48:41 2016
New Revision: 1746700
URL: http://svn.apache.org/viewvc?rev=1746700&view=rev
Log:
CTAKES-384
- fixed reader
- added ID
- added more rules (ID, Fax, ...)
Modified:
ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta
ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java Fri Jun 3 09:48:41 2016
@@ -39,8 +39,10 @@ import org.apache.ctakes.deid.i2b2.DeIdi
import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.NAME;
import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.PROFESSION;
import org.apache.ctakes.deid.type.Age;
+import org.apache.ctakes.deid.type.Contact;
import org.apache.ctakes.deid.type.Date;
import org.apache.ctakes.deid.type.DeidEntity;
+import org.apache.ctakes.deid.type.ID;
import org.apache.ctakes.deid.type.Location;
import org.apache.ctakes.deid.type.Name;
import org.apache.ctakes.deid.type.Profession;
@@ -166,7 +168,7 @@ public class I2B2DeidCollectionReader ex
entity.setComment(element.getComment());
} else if (object instanceof CONTACT) {
CONTACT element = (CONTACT) object;
- entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
+ entity = new Contact(jcas, element.getStart().intValue(), element.getEnd().intValue());
entity.setId(element.getId());
entity.setEntityType(element.getTYPE());
entity.setComment(element.getComment());
@@ -176,6 +178,12 @@ public class I2B2DeidCollectionReader ex
entity.setId(element.getId());
entity.setEntityType(element.getTYPE());
entity.setComment(element.getComment());
+ } else if (object instanceof org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.ID) {
+ org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.ID element = (org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.ID) object;
+ entity = new ID(jcas, element.getStart().intValue(), element.getEnd().intValue());
+ entity.setId(element.getId());
+ entity.setEntityType(element.getTYPE());
+ entity.setComment(element.getComment());
}
entity.addToIndexes();
}
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java Fri Jun 3 09:48:41 2016
@@ -48,7 +48,7 @@ public class SimpleDeidEntityComparator
public static final String PARAM_CREATE_RUTA_EVAL_ANNOTATIONS = "createRutaEvalAnnotations";
- private static final boolean PRINT_ANNOTATIONS = true;
+ private static final boolean PRINT_ANNOTATIONS = false;
@ConfigurationParameter(name = PARAM_CREATE_RUTA_EVAL_ANNOTATIONS, mandatory = true, defaultValue = "false")
private Boolean createRutaEvalAnnotations;
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml Fri Jun 3 09:48:41 2016
@@ -11,6 +11,11 @@
<supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
</typeDescription>
<typeDescription>
+ <name>org.apache.ctakes.deid.type.ID</name>
+ <description/>
+ <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+ </typeDescription>
+ <typeDescription>
<name>org.apache.ctakes.deid.type.Age</name>
<description/>
<supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta Fri Jun 3 09:48:41 2016
@@ -8,6 +8,8 @@ RETAINTYPE(WS);
(Num4{-PARTOF(deid.Date),REGEXP("19..|20..")} Dash Num2 Dash Num2){-> deid.Date};
(Num12{-PARTOF(deid.Date)} Slash Num12 Slash Num4{REGEXP("19..|20..")}){-> deid.Date};
(Num12{-PARTOF(deid.Date)} Slash (Num12 Slash)? Num2{REGEXP("[123456789].")}){-> deid.Date};
+RETAINTYPE;
+
+(MonthInd{-PARTOF(deid.Date)} Num4{-PARTOF(deid.Date),REGEXP("19..|20..")}){-> deid.Date};
Num4{-PARTOF(deid.Date),REGEXP("19..|20..")-> deid.Date};
MonthInd{-PARTOF(deid.Date)-> deid.Date};
-RETAINTYPE;
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta Fri Jun 3 09:48:41 2016
@@ -16,6 +16,8 @@ TYPESYSTEM org.apache.ctakes.deid.StateR
TYPESYSTEM org.apache.ctakes.deid.CountryRutaTypeSystem;
TYPESYSTEM org.apache.ctakes.deid.FaxRutaTypeSystem;
TYPESYSTEM org.apache.ctakes.deid.PatientRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.IDNumRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.MedicalRecNumRutaTypeSystem;
SCRIPT org.apache.ctakes.deid.Dictionaries;
SCRIPT org.apache.ctakes.deid.Age;
@@ -55,8 +57,8 @@ Age{-> Age.entityType = "AGE"};
Country{-> Location, Location.entityType = "COUNTRY"};
Doctor{-> Name, Name.entityType = "DOCTOR"};
Fax{-> Contact, Contact.entityType = "FAX"};
-//IDNum{-> ID, ID.entityType = "IDNUM"};
-//MedicalRecNum{-> ID, ID.entityType = "MEDICALRECNUM"};
+IDNum{-> ID, ID.entityType = "IDNUM"};
+MedicalRecNum{-> ID, ID.entityType = "MEDICALRECNUM"};
Patient{-> Name, Name.entityType = "PATIENT"};
Phone{-> Contact, Contact.entityType = "PHONE"};
State{-> Location, Location.entityType= "STATE"};
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta Fri Jun 3 09:48:41 2016
@@ -32,19 +32,30 @@ DECLARE Url, Email;
DECLARE MDInd;
"M\\.D\\."-> MDInd;
-DECLARE Num1, Num12, Num2, Num3, Num34, Num4, Num5, Num6, Num7, Num8;
+DECLARE Num1, Num12, Num2, Num23, Num24, Num3, Num34, Num35, Num4, Num45, Num46, Num5,
+ Num6, Num68, Num69, Num7, Num78, Num8, Num812;
NUM->{
Document{REGEXP(".")-> Num1};
Document{REGEXP("..?")-> Num12};
Document{REGEXP("..")-> Num2};
+ Document{REGEXP("...?")-> Num23};
+ Document{REGEXP("\\d{2,4}")-> Num24};
Document{REGEXP("...")-> Num3};
Document{REGEXP("....?")-> Num34};
+ Document{REGEXP("\\d{3,5}")-> Num35};
Document{REGEXP("....")-> Num4};
+ Document{REGEXP("......?")-> Num45};
+ Document{REGEXP("\\d{4,6}")-> Num46};
Document{REGEXP("......")-> Num5};
Document{REGEXP(".......")-> Num6};
+ Document{REGEXP("\\d{6,8}")-> Num68};
+ Document{REGEXP("\\d{6,9}")-> Num69};
Document{REGEXP(".......")-> Num7};
+ Document{REGEXP("........?")-> Num78};
Document{REGEXP("........")-> Num8};
+ Document{REGEXP("\\d{8,12}")-> Num812};
+
};
DECLARE LParen, RParen, Dash, Slash, Hash;
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta Fri Jun 3 09:48:41 2016
@@ -15,4 +15,6 @@ SPECIAL.ct=="^" (CAP COMMA? CAP{ENDSWITH
//W{REGEXP("Drs?", true)} PERIOD?
// @CW{-REGEXP("Done|Take|PO", true)}
// CW CW?
-// ;
\ No newline at end of file
+// ;
+
+//TODO
\ No newline at end of file
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta Fri Jun 3 09:48:41 2016
@@ -1,3 +1,15 @@
PACKAGE org.apache.ctakes.deid;
-DECLARE Fax;
\ No newline at end of file
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
+DECLARE Fax;
+DECLARE FaxPattern;
+
+(LParen? @Num3 RParen? Dash? Num34 Dash? Dash Num34?){-> FaxPattern};
+(Num3 ANY?{PARTOF({Dash,PERIOD})} Num4 ANY{PARTOF({Dash,PERIOD})} Num3?){-> FaxPattern};
+(Num3 ANY?{PARTOF({Dash,PERIOD})} Num3 ANY{PARTOF({Dash,PERIOD})} Num4?){-> FaxPattern};
+(Num3 Num3 Num4){-> FaxPattern};
+(Num3 Num4 Num3){-> FaxPattern};
+
+W?{REGEXP("(?i)fax")} W?{REGEXP("(?i)No|Num|Number")} ANY?{PARTOF({COLON,Hash,PERIOD})}
+ @FaxPattern{-> Fax};
\ No newline at end of file
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta Fri Jun 3 09:48:41 2016
@@ -1,3 +1,30 @@
PACKAGE org.apache.ctakes.deid;
-DECLARE IDNum;
\ No newline at end of file
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
+DECLARE IDNum;
+
+W{REGEXP("(?i)eScription")} W{REGEXP("(?i)document")} COLON
+ (@Num1 Dash Num68){-> IDNum};
+
+W{REGEXP("(?i)Member")} W{REGEXP("(?i)ID")} ANY{PARTOF({COLON,Hash})}
+ @Num812{-> IDNum};
+
+SPECIAL.ct=="_" (CAP @Num34 Slash Num35){-> IDNum};
+SPECIAL.ct=="_" (@Num45 Slash Num45){-> IDNum};
+
+(W{REGEXP("..")} COLON W{REGEXP("..")} COLON @Num35 (CW{REGEXP(".")} |(Slash Num4))?){-> IDNum};
+
+(Num2 Dash Num69){-> IDNum} PERIOD W{REGEXP("doc?")};
+
+W{REGEXP("(?i)Backjob|Voicejob|Job|Exam|Cardiology|Specimen|TR:?")} W?{REGEXP("(?i)ID|Number")} COLON
+ @Num68{-> IDNum};
+W{REGEXP("(?i)Backjob|Voicejob|Job|Exam|Cardiology|Specimen|TR:?")} W?{REGEXP("(?i)ID|Number")} COLON
+ (CAP @Num69){-> IDNum};
+
+W{REGEXP("(?i)LOT|FI|PA")} ANY?{PARTOF({Hash,COLON})}
+ @Num46{-> IDNum};
+
+(CAP{REGEXP("..")} COLON Num46 COLON Num24 ){-> IDNum};
+
+W{REGEXP("(?i)Exam")} W{REGEXP("(?i)Code")} COLON (Num23 CAP? @Num1){-> IDNum};
\ No newline at end of file
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta Fri Jun 3 09:48:41 2016
@@ -1,3 +1,21 @@
PACKAGE org.apache.ctakes.deid;
-DECLARE MedicalRecNum;
\ No newline at end of file
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
+DECLARE MedicalRecNum;
+
+(Num3 Dash Num2 Dash Num2 (Dash Num1)?){-> MedicalRecNum};
+
+
+W{REGEXP("MRN|MR|PH|DHN|LMH|Unit|Number")} Hash? COLON? @Num78{-> MedicalRecNum};
+W{REGEXP("MRN|MR|PH|DHN|LMH|Unit|Number")} Hash? COLON? (@Num3 Num2 Num2){-> MedicalRecNum};
+
+SPECIAL.ct=="^" Num78{-> MedicalRecNum} SPECIAL.ct=="^";
+
+Num8{ENDSWITH(SplitExternal)-> MedicalRecNum};
+(CAP Num69){ENDSWITH(SplitExternal)-> MedicalRecNum};
+
+W{REGEXP("(?)Report|Unit")} W{REGEXP("(?)Number")} COLON (CAP @Num69){-> MedicalRecNum};
+
+W{REGEXP("(?)Accession")} ANY{PARTOF({Hash, COLON})}
+ (@Num45 COLON? CW{REGEXP(".")} NUM{REGEXP("\\d{4,8}")}){-> MedicalRecNum};
\ No newline at end of file
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta Fri Jun 3 09:48:41 2016
@@ -1,3 +1,5 @@
PACKAGE org.apache.ctakes.deid;
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
DECLARE Patient;
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd Fri Jun 3 09:48:41 2016
@@ -90,6 +90,20 @@
</xs:simpleContent>
</xs:complexType>
</xs:element>
+ <xs:element name="ID">
+ <xs:complexType>
+ <xs:simpleContent>
+ <xs:extension base="xs:string">
+ <xs:attribute type="xs:string" name="id" use="optional"/>
+ <xs:attribute type="xs:integer" name="start" use="optional"/>
+ <xs:attribute type="xs:integer" name="end" use="optional"/>
+ <xs:attribute type="xs:string" name="text" use="optional"/>
+ <xs:attribute type="xs:string" name="TYPE" use="optional"/>
+ <xs:attribute type="xs:string" name="comment" use="optional"/>
+ </xs:extension>
+ </xs:simpleContent>
+ </xs:complexType>
+ </xs:element>
</xs:choice>
</xs:complexType>
</xs:element>