You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by pk...@apache.org on 2016/06/03 09:48:41 UTC

svn commit: r1746700 - in /ctakes/sandbox/ctakes-clinical-deid/src/main: java/org/apache/ctakes/deid/ resources/org/apache/ctakes/deid/types/ ruta/org/apache/ctakes/deid/ xsd/

Author: pkluegl
Date: Fri Jun  3 09:48:41 2016
New Revision: 1746700

URL: http://svn.apache.org/viewvc?rev=1746700&view=rev
Log:
CTAKES-384
- fixed reader
- added ID
- added more rules (ID, Fax, ...)

Modified:
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java Fri Jun  3 09:48:41 2016
@@ -39,8 +39,10 @@ import org.apache.ctakes.deid.i2b2.DeIdi
 import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.NAME;
 import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.PROFESSION;
 import org.apache.ctakes.deid.type.Age;
+import org.apache.ctakes.deid.type.Contact;
 import org.apache.ctakes.deid.type.Date;
 import org.apache.ctakes.deid.type.DeidEntity;
+import org.apache.ctakes.deid.type.ID;
 import org.apache.ctakes.deid.type.Location;
 import org.apache.ctakes.deid.type.Name;
 import org.apache.ctakes.deid.type.Profession;
@@ -166,7 +168,7 @@ public class I2B2DeidCollectionReader ex
       entity.setComment(element.getComment());
     } else if (object instanceof CONTACT) {
       CONTACT element = (CONTACT) object;
-      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity = new Contact(jcas, element.getStart().intValue(), element.getEnd().intValue());
       entity.setId(element.getId());
       entity.setEntityType(element.getTYPE());
       entity.setComment(element.getComment());
@@ -176,6 +178,12 @@ public class I2B2DeidCollectionReader ex
       entity.setId(element.getId());
       entity.setEntityType(element.getTYPE());
       entity.setComment(element.getComment());
+    } else if (object instanceof org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.ID) {
+      org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.ID element = (org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.ID) object;
+      entity = new ID(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
     }
     entity.addToIndexes();
   }

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java Fri Jun  3 09:48:41 2016
@@ -48,7 +48,7 @@ public class SimpleDeidEntityComparator
 
   public static final String PARAM_CREATE_RUTA_EVAL_ANNOTATIONS = "createRutaEvalAnnotations";
 
-  private static final boolean PRINT_ANNOTATIONS = true;
+  private static final boolean PRINT_ANNOTATIONS = false;
 
   @ConfigurationParameter(name = PARAM_CREATE_RUTA_EVAL_ANNOTATIONS, mandatory = true, defaultValue = "false")
   private Boolean createRutaEvalAnnotations;

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml Fri Jun  3 09:48:41 2016
@@ -11,6 +11,11 @@
       <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
     </typeDescription>
     <typeDescription>
+      <name>org.apache.ctakes.deid.type.ID</name>
+      <description/>
+      <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+    </typeDescription>
+    <typeDescription>
       <name>org.apache.ctakes.deid.type.Age</name>
       <description/>
       <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta Fri Jun  3 09:48:41 2016
@@ -8,6 +8,8 @@ RETAINTYPE(WS);
 (Num4{-PARTOF(deid.Date),REGEXP("19..|20..")} Dash Num2 Dash Num2){-> deid.Date};
 (Num12{-PARTOF(deid.Date)} Slash Num12 Slash Num4{REGEXP("19..|20..")}){-> deid.Date};
 (Num12{-PARTOF(deid.Date)} Slash (Num12 Slash)? Num2{REGEXP("[123456789].")}){-> deid.Date};
+RETAINTYPE;
+
+(MonthInd{-PARTOF(deid.Date)} Num4{-PARTOF(deid.Date),REGEXP("19..|20..")}){-> deid.Date};
 Num4{-PARTOF(deid.Date),REGEXP("19..|20..")-> deid.Date};
 MonthInd{-PARTOF(deid.Date)-> deid.Date};
-RETAINTYPE;

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta Fri Jun  3 09:48:41 2016
@@ -16,6 +16,8 @@ TYPESYSTEM org.apache.ctakes.deid.StateR
 TYPESYSTEM org.apache.ctakes.deid.CountryRutaTypeSystem;
 TYPESYSTEM org.apache.ctakes.deid.FaxRutaTypeSystem;
 TYPESYSTEM org.apache.ctakes.deid.PatientRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.IDNumRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.MedicalRecNumRutaTypeSystem;
 
 SCRIPT org.apache.ctakes.deid.Dictionaries;
 SCRIPT org.apache.ctakes.deid.Age;
@@ -55,8 +57,8 @@ Age{-> Age.entityType = "AGE"};
 Country{-> Location, Location.entityType = "COUNTRY"};
 Doctor{-> Name, Name.entityType = "DOCTOR"};
 Fax{-> Contact, Contact.entityType = "FAX"};
-//IDNum{-> ID, ID.entityType = "IDNUM"};
-//MedicalRecNum{-> ID, ID.entityType = "MEDICALRECNUM"};
+IDNum{-> ID, ID.entityType = "IDNUM"};
+MedicalRecNum{-> ID, ID.entityType = "MEDICALRECNUM"};
 Patient{-> Name, Name.entityType = "PATIENT"};
 Phone{-> Contact, Contact.entityType = "PHONE"};
 State{-> Location, Location.entityType= "STATE"};

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta Fri Jun  3 09:48:41 2016
@@ -32,19 +32,30 @@ DECLARE Url, Email;
 DECLARE MDInd;
 "M\\.D\\."-> MDInd;
 
-DECLARE Num1, Num12, Num2, Num3, Num34, Num4, Num5, Num6, Num7, Num8;
+DECLARE Num1, Num12, Num2, Num23, Num24, Num3, Num34,  Num35, Num4, Num45, Num46, Num5, 
+        Num6, Num68, Num69, Num7, Num78, Num8, Num812;
 
 NUM->{
     Document{REGEXP(".")-> Num1};
 	Document{REGEXP("..?")-> Num12};
 	Document{REGEXP("..")-> Num2};
+	Document{REGEXP("...?")-> Num23};
+	Document{REGEXP("\\d{2,4}")-> Num24};
 	Document{REGEXP("...")-> Num3};
 	Document{REGEXP("....?")-> Num34};
+	Document{REGEXP("\\d{3,5}")-> Num35};
 	Document{REGEXP("....")-> Num4};
+	Document{REGEXP("......?")-> Num45};
+	Document{REGEXP("\\d{4,6}")-> Num46};
 	Document{REGEXP("......")-> Num5};
 	Document{REGEXP(".......")-> Num6};
+	Document{REGEXP("\\d{6,8}")-> Num68};
+	Document{REGEXP("\\d{6,9}")-> Num69};
 	Document{REGEXP(".......")-> Num7};
+	Document{REGEXP("........?")-> Num78};
 	Document{REGEXP("........")-> Num8};
+	Document{REGEXP("\\d{8,12}")-> Num812};
+	
 };
 
 DECLARE LParen, RParen, Dash, Slash, Hash;

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta Fri Jun  3 09:48:41 2016
@@ -15,4 +15,6 @@ SPECIAL.ct=="^" (CAP COMMA? CAP{ENDSWITH
 //W{REGEXP("Drs?", true)} PERIOD? 
 //    @CW{-REGEXP("Done|Take|PO", true)}
 //    CW CW?
-//    ;
\ No newline at end of file
+//    ;
+
+//TODO
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta Fri Jun  3 09:48:41 2016
@@ -1,3 +1,15 @@
 PACKAGE org.apache.ctakes.deid;
 
-DECLARE Fax;
\ No newline at end of file
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
+DECLARE Fax;
+DECLARE FaxPattern;
+
+(LParen? @Num3 RParen? Dash? Num34 Dash? Dash Num34?){-> FaxPattern};
+(Num3 ANY?{PARTOF({Dash,PERIOD})} Num4 ANY{PARTOF({Dash,PERIOD})} Num3?){-> FaxPattern};
+(Num3 ANY?{PARTOF({Dash,PERIOD})} Num3 ANY{PARTOF({Dash,PERIOD})} Num4?){-> FaxPattern};
+(Num3 Num3 Num4){-> FaxPattern};
+(Num3 Num4 Num3){-> FaxPattern};
+
+W?{REGEXP("(?i)fax")} W?{REGEXP("(?i)No|Num|Number")} ANY?{PARTOF({COLON,Hash,PERIOD})} 
+    @FaxPattern{-> Fax};
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/IDNum.ruta Fri Jun  3 09:48:41 2016
@@ -1,3 +1,30 @@
 PACKAGE org.apache.ctakes.deid;
 
-DECLARE IDNum;
\ No newline at end of file
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
+DECLARE IDNum;
+
+W{REGEXP("(?i)eScription")} W{REGEXP("(?i)document")} COLON 
+    (@Num1 Dash Num68){-> IDNum};
+    
+W{REGEXP("(?i)Member")} W{REGEXP("(?i)ID")} ANY{PARTOF({COLON,Hash})} 
+    @Num812{-> IDNum};
+
+SPECIAL.ct=="_" (CAP @Num34 Slash Num35){-> IDNum};
+SPECIAL.ct=="_" (@Num45 Slash Num45){-> IDNum};
+
+(W{REGEXP("..")} COLON W{REGEXP("..")} COLON @Num35 (CW{REGEXP(".")} |(Slash Num4))?){-> IDNum};
+
+(Num2 Dash Num69){-> IDNum} PERIOD W{REGEXP("doc?")};
+
+W{REGEXP("(?i)Backjob|Voicejob|Job|Exam|Cardiology|Specimen|TR:?")} W?{REGEXP("(?i)ID|Number")} COLON 
+    @Num68{-> IDNum};
+W{REGEXP("(?i)Backjob|Voicejob|Job|Exam|Cardiology|Specimen|TR:?")} W?{REGEXP("(?i)ID|Number")} COLON 
+    (CAP @Num69){-> IDNum};
+
+W{REGEXP("(?i)LOT|FI|PA")} ANY?{PARTOF({Hash,COLON})}    
+    @Num46{-> IDNum};
+    
+(CAP{REGEXP("..")} COLON Num46 COLON Num24 ){-> IDNum};
+
+W{REGEXP("(?i)Exam")} W{REGEXP("(?i)Code")} COLON (Num23 CAP? @Num1){-> IDNum};
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/MedicalRecNum.ruta Fri Jun  3 09:48:41 2016
@@ -1,3 +1,21 @@
 PACKAGE org.apache.ctakes.deid;
 
-DECLARE MedicalRecNum;
\ No newline at end of file
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
+DECLARE MedicalRecNum;
+
+(Num3 Dash Num2 Dash Num2 (Dash Num1)?){-> MedicalRecNum};
+
+
+W{REGEXP("MRN|MR|PH|DHN|LMH|Unit|Number")} Hash? COLON? @Num78{-> MedicalRecNum};
+W{REGEXP("MRN|MR|PH|DHN|LMH|Unit|Number")} Hash? COLON? (@Num3 Num2 Num2){-> MedicalRecNum};
+
+SPECIAL.ct=="^" Num78{-> MedicalRecNum} SPECIAL.ct=="^";
+
+Num8{ENDSWITH(SplitExternal)-> MedicalRecNum};
+(CAP Num69){ENDSWITH(SplitExternal)-> MedicalRecNum};
+
+W{REGEXP("(?)Report|Unit")} W{REGEXP("(?)Number")} COLON (CAP @Num69){-> MedicalRecNum};
+
+W{REGEXP("(?)Accession")} ANY{PARTOF({Hash, COLON})} 
+    (@Num45 COLON? CW{REGEXP(".")} NUM{REGEXP("\\d{4,8}")}){-> MedicalRecNum};
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta Fri Jun  3 09:48:41 2016
@@ -1,3 +1,5 @@
 PACKAGE org.apache.ctakes.deid;
 
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
 DECLARE Patient;

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd?rev=1746700&r1=1746699&r2=1746700&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd Fri Jun  3 09:48:41 2016
@@ -90,6 +90,20 @@
                   </xs:simpleContent>
                 </xs:complexType>
               </xs:element>
+              <xs:element name="ID">
+                <xs:complexType>
+                  <xs:simpleContent>
+                    <xs:extension base="xs:string">
+                      <xs:attribute type="xs:string" name="id" use="optional"/>
+                      <xs:attribute type="xs:integer" name="start" use="optional"/>
+                      <xs:attribute type="xs:integer" name="end" use="optional"/>
+                      <xs:attribute type="xs:string" name="text" use="optional"/>
+                      <xs:attribute type="xs:string" name="TYPE" use="optional"/>
+                      <xs:attribute type="xs:string" name="comment" use="optional"/>
+                    </xs:extension>
+                  </xs:simpleContent>
+                </xs:complexType>
+              </xs:element>
             </xs:choice>
           </xs:complexType>
         </xs:element>