You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by pk...@apache.org on 2016/06/06 11:56:42 UTC

svn commit: r1746984 [3/3] - in /ctakes/sandbox/ctakes-clinical-deid/src: main/resources/wordlists/ main/ruta/org/apache/ctakes/deid/ test/java/org/apache/ctakes/deid/

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/spoken_language.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/spoken_language.txt?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/spoken_language.txt (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/spoken_language.txt Mon Jun  6 11:56:42 2016
@@ -31,7 +31,7 @@ Burundian-speaking
 Cambodian-speaking
 Cameroonian-speaking
 Canadian-speaking
-Cape Verdean-speaking
+CapeVerdean-speaking
 Chadian-speaking
 Chilean-speaking
 Chinese-speaking
@@ -130,14 +130,14 @@ Senegalese-speaking
 Serb-speaking
 Serbian-speaking
 Seychellois-speaking
-Sierra Leonian-speaking
+SierraLeonian-speaking
 Singaporean-speaking
 Slovak-speaking
 Slovene-speaking
 Slovenian-speaking
 Somali-speaking
 Spanish-speaking
-Sri Lankan-speaking
+SriLankan-speaking
 Sudanese-speaking
 Surinamese-speaking
 Swazi-speaking
@@ -165,7 +165,7 @@ Vanuatuan-speaking
 Venezuelan-speaking
 Vietnamese-speaking
 Welsh-speaking
-Western Samoan-speaking
+WesternSamoan-speaking
 Yemeni-speaking
 Yugoslav-speaking
 Zairean-speaking

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/us_state.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/us_state.txt?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/us_state.txt (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/us_state.txt Mon Jun  6 11:56:42 2016
@@ -1,14 +1,14 @@
 ALABAMA
 ALASKA
-AMERICAN SAMOA
+AMERICANSAMOA
 ARIZONA
 ARKANSAS
 CALIFORNIA
 COLORADO
 CONNECTICUT
 DELAWARE
-DISTRICT OF COLUMBIA
-FEDERATED STATES OF MICRONESIA
+DISTRICTOFCOLUMBIA
+FEDERATEDSTATESOFMICRONESIA
 FLORIDA
 GEORGIA
 GUAM
@@ -21,7 +21,7 @@ KANSAS
 KENTUCKY
 LOUISIANA
 MAINE
-MARSHALL ISLANDS
+MARSHALLISLANDS
 MARYLAND
 MASSACHUSETTS
 MICHIGAN
@@ -31,29 +31,29 @@ MISSOURI
 MONTANA
 NEBRASKA
 NEVADA
-NEW HAMPSHIRE
-NEW JERSEY
-NEW MEXICO
-NEW YORK
-NORTH CAROLINA
-NORTH DAKOTA
-NORTHERN MARIANA ISLANDS
+NEWHAMPSHIRE
+NEWJERSEY
+NEWMEXICO
+NEWYORK
+NORTHCAROLINA
+NORTHDAKOTA
+NORTHERNMARIANAISLANDS
 OHIO
 OKLAHOMA
 OREGON
 PALAU
 PENNSYLVANIA
-PUERTO RICO
-RHODE ISLAND
-SOUTH CAROLINA
-SOUTH DAKOTA
+PUERTORICO
+RHODEISLAND
+SOUTHCAROLINA
+SOUTHDAKOTA
 TENNESSEE
 TEXAS
 UTAH
 VERMONT
 VIRGINIA
-VIRGIN ISLANDS
+VIRGINISLANDS
 WASHINGTON
-WEST VIRGINIA
+WESTVIRGINIA
 WISCONSIN
 WYOMING
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/us_state_acronym_abbreviation.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/us_state_acronym_abbreviation.txt?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/us_state_acronym_abbreviation.txt (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/us_state_acronym_abbreviation.txt Mon Jun  6 11:56:42 2016
@@ -52,7 +52,7 @@ WI
 WV
 WY
 Ala
-Amer. Samoa
+Amer.Samoa
 Ariz
 Ark
 Calif
@@ -72,13 +72,13 @@ Mo
 Mont
 N.C.
 N.Dak
-N. Dak
+N.Dak
 Nebr
 Nev
 N.H.
 N.J.
 N.Mex
-N. Mex
+N.Mex
 N.Y.
 Ohio
 Okla
@@ -89,7 +89,7 @@ P.R.
 R.I.
 S.C.
 S.Dak
-S. Dak
+S.Dak
 Tenn
 Tex
 Utah
@@ -100,5 +100,5 @@ Wash
 Wis
 Wisc
 W.Va
-W. Va
+W.Va
 Wyo
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta Mon Jun  6 11:56:42 2016
@@ -11,5 +11,10 @@ RETAINTYPE(WS);
 RETAINTYPE;
 
 (MonthInd{-PARTOF(deid.Date)} Num4{-PARTOF(deid.Date),REGEXP("19..|20..")}){-> deid.Date};
-Num4{-PARTOF(deid.Date),REGEXP("19..|20..")-> deid.Date};
-MonthInd{-PARTOF(deid.Date), -PARTOF(deid.DeidEntity)-> deid.Date};
+
+RETAINTYPE(WS);
+WS @Num4{-PARTOF(deid.Date),REGEXP("19..|20..")-> deid.Date};
+WS @MonthInd{-PARTOF(deid.Date), -PARTOF(deid.DeidEntity)-> deid.Date};
+RETAINTYPE;
+
+deid.Date.ct=="may"{-> UNMARK(deid.Date)};
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta Mon Jun  6 11:56:42 2016
@@ -53,6 +53,7 @@ CALL(Date);
 
 
 Age{PARTOF(deid.Date) -> UNMARK(Age)};
+Fax{PARTOF(Phone) -> UNMARK(Fax)};
 
 Email{-> Contact, Contact.entityType = "EMAIL"};
 Url{-> Contact, Contact.entityType = "URL"};

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta Mon Jun  6 11:56:42 2016
@@ -31,6 +31,8 @@ TRIE(
     "dr_prefix2.txt" = DrPrefixInd2,
     trie, true, 4, false, 0, "-");
 
+KeywordInd-> {KeywordInd{-> UNMARK(KeywordInd)} ANY; ANY KeywordInd{-> UNMARK(KeywordInd)};};
+
 DECLARE Url, Email;
 "[_A-Za-z0-9-\\+]+(\\.[_A-Za-z0-9- ]+)*@[A-Za-z0-9-]+(\\.[A-Za-z0-9]+)*(\\.(com|org|edu|gov|mil|co\\.uk))" -> Email;
 "(https?://)?(www.)([a-zA-Z0-9]+).[a-zA-Z0-9]*.[a-z]{3}.?([a-z]+)?(/[a-zA-Z0-9]+)?|(https?|ftp)://[^\\s/$.?#].[^\\s]*|www.[^\\s/$.?#].[^\\s]*" -> Url;

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta Mon Jun  6 11:56:42 2016
@@ -26,15 +26,15 @@ W{REGEXP("Drs?", true)} PERIOD?
 (CAP{-REGEXP("DR")} (COMMA| (CW PERIOD))? CAP CAP? CW?){-> Doctor} COMMA? DrPostfixInd;
 
 DrPrefixInd1{-> SHIFT(DrPrefixInd1,1,4)} W{REGEXP("(?i)by|for")} W{REGEXP("(?i)physician")} COLON?;
-DrPrefixInd1 (CW{-REGEXP("Dr")} CW PERIOD? CW){-> Doctor};
-DrPrefixInd1 (CW{-REGEXP("Dr")} COMMA? CW CW?){-> Doctor};
-DrPrefixInd1 (CAP{-REGEXP("DR")} ANY{PARTOF({COMMA,CW,PERIOD})} CAP PERIOD? CAP CAP?){-> Doctor};
+DrPrefixInd1 (CW{-REGEXP("Dr")} CW PERIOD? CW){-PARTOF(Doctor) -> Doctor};
+DrPrefixInd1 (CW{-REGEXP("Dr")} COMMA? CW CW?){-PARTOF(Doctor) -> Doctor};
+DrPrefixInd1 (CAP{-REGEXP("DR")} ANY{PARTOF({COMMA,CW,PERIOD})} CAP PERIOD? CAP CAP?){-PARTOF(Doctor) -> Doctor};
 
 Split (W{-> Doctor} ANY?{PARTOF({Slash,COLON,Bar})})+ SEMICOLON? (Num2 Slash)? Num69 PERIOD "doc";
 
-DrPrefixInd2{-> SHIFT(DrPrefixInd2,1,3)} W?{REGEXP("(?i)physician")} COLON;
-DrPrefixInd2 (CW{-REGEXP("Dr")} CW PERIOD? CW){-> Doctor};
-DrPrefixInd2 (CW{-REGEXP("Dr")} COMMA? CW CW?){-> Doctor};
-DrPrefixInd1 (CAP{-REGEXP("DR")} COMMA? CAP){-> Doctor};
-DrPrefixInd1 (CW{-REGEXP("Dr")}){-> Doctor};
+DrPrefixInd2 (CW{-REGEXP("Dr")} CW PERIOD? CW){-PARTOF(Doctor) -> Doctor};
+DrPrefixInd2 (CW{-REGEXP("Dr")} COMMA? CW CW?){-PARTOF(Doctor) -> Doctor};
+DrPrefixInd2 (CAP{-REGEXP("DR")} COMMA? CAP){-PARTOF(Doctor) -> Doctor};
+DrPrefixInd2 (CW{-REGEXP("Dr")}){-> Doctor};
 
+Doctor->{ANY @Doctor{-> UNMARK(Doctor)}; @Doctor{-> UNMARK(Doctor)} ANY;};
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Fax.ruta Mon Jun  6 11:56:42 2016
@@ -11,5 +11,6 @@ DECLARE FaxPattern;
 (Num3 Num3 Num4){-> FaxPattern};
 (Num3 Num4 Num3){-> FaxPattern};
 
-W?{REGEXP("(?i)fax")} W?{REGEXP("(?i)No|Num|Number")} ANY?{PARTOF({COLON,Hash,PERIOD})} 
-    @FaxPattern{-> Fax};
\ No newline at end of file
+(W{REGEXP("(?i)fax")} W?{REGEXP("(?i)No|Num|Number")})? ANY?{PARTOF({COLON,Hash,PERIOD})} 
+    @FaxPattern{-> Fax};
+    

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Patient.ruta Mon Jun  6 11:56:42 2016
@@ -4,65 +4,66 @@ TYPESYSTEM org.apache.ctakes.deid.Dictio
 
 DECLARE Patient;
 
+NamePrefixInd COLON?{-ENDSWITH(SplitExternal)}
+    (
+        CW{-PARTOF(NamePrefixInd)}
+        COMMA
+        CW (CW{REGEXP(".")} PERIOD? CW?)?
+        ){-PARTOF(Patient) -> Patient};
 
 NamePrefixInd COLON?{-ENDSWITH(SplitExternal)}
     (
     CW{-PARTOF(NamePrefixInd), -REGEXP(".")}
     CW[0,3]{-REGEXP("Done")}
-    ){-> Patient};
+    ){-PARTOF(Patient) -> Patient};
 
 NamePrefixInd COLON?{-ENDSWITH(SplitExternal)}
     (
     CW{-PARTOF(NamePrefixInd)}
     PERIOD{-ENDSWITH(Split)}
     CW CW? PERIOD?{-ENDSWITH(Split)}
-    ){-> Patient};
-
-NamePrefixInd COLON?{-ENDSWITH(SplitExternal)}
-    (
-    CW{-PARTOF(NamePrefixInd)}
-    COMMA
-    CW (CW{REGEXP(".")} PERIOD?)?
-    ){-> Patient};
+    ){-PARTOF(Patient) -> Patient};
     
 NamePrefixInd COLON?{-ENDSWITH(SplitExternal)}
     (
     CAP COMMA? CAP (CW{REGEXP(".")} PERIOD?)?
-    ){-> Patient};
+    ){-PARTOF(Patient) -> Patient};
     
+
+PatientPrefixInd COLON
+    (
+        CW{-PARTOF(NamePrefixInd)}
+        COMMA
+        CW (CW{REGEXP(".")} PERIOD? CW?)?
+        ){-PARTOF(Patient) -> Patient};
     
 PatientPrefixInd COLON
     (
     CW{-PARTOF(NamePrefixInd), -REGEXP(".")}
     CW[0,3]{-REGEXP("Done")}
-    ){-> Patient};
+    ){-PARTOF(Patient) -> Patient};
 
 PatientPrefixInd COLON
     (
     CW{-PARTOF(NamePrefixInd)}
     PERIOD{-ENDSWITH(Split)}
     CW CW? PERIOD?{-ENDSWITH(Split)}
-    ){-> Patient};
+    ){-PARTOF(Patient) -> Patient};
+
 
-PatientPrefixInd COLON
-    (
-    CW{-PARTOF(NamePrefixInd)}
-    COMMA
-    CW (CW{REGEXP(".")} PERIOD?)?
-    ){-> Patient};
     
 PatientPrefixInd COLON
     (
     CAP COMMA? CAP (CW{REGEXP(".")} PERIOD?)?
-    ){-> Patient};
+    ){-PARTOF(Patient) -> Patient};
 
 W.ct=="seeing" 
     (CW{-REGEXP("(?i)Done|Dr|Mr|Mrs|Miss|Ms|Pt|Patients")}  
-    CW?{-REGEXP("Done")}){-> Patient};
+    CW?{-REGEXP("Done")}){-PARTOF(Patient) -> Patient};
 
 RETAINTYPE(BREAK);
-BREAK BREAK (@CAP COMMA? CAP ((CW PERIOD)|(CW COMMA? CW))?){-> Patient}
+BREAK BREAK (@CAP COMMA? CAP ((CW PERIOD)|(CW COMMA? CW))?){-PARTOF(Patient) -> Patient}
     BREAK BREAK? Num78 BREAK;  
 RETAINTYPE;  
 
-W{REGEXP("Mr|Mrs|Ms|Miss")} PERIOD? W{-REGEXP("(?i)take|pt") -> Patient};
\ No newline at end of file
+W{REGEXP("Mr|Mrs|Ms|Miss")} PERIOD? W{-REGEXP("(?i)take|pt"), -PARTOF(Patient) -> Patient};
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/State.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/State.ruta?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/State.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/State.ruta Mon Jun  6 11:56:42 2016
@@ -10,16 +10,16 @@ State CW{->UNMARK(State)};
 State{-> UNMARK(State)} W{REGEXP("hospital|heart|association|medical|care|nursing|avenue|street|road|drive|boulevard", true)};
 
 //getSTATE3 
-"originally" "from" W[0,3]{-REGEXP("from")} @StateContext{-> State};
+"originally" "from" W[0,3]{-REGEXP("from")} @StateContext{-PARTOF(State) -> State};
 // TODO refactor to wordlist?
 W{REGEXP("home|son|daughter|mother|father|raised|grew|lived|lives", true)} W[0,3]?
-    W{REGEXP("in")} W[0,3]{-REGEXP("in")} @StateContext{->State};
+    W{REGEXP("in")} W[0,3]{-REGEXP("in")} @StateContext{-PARTOF(State) -> State};
 
 //getSTATE4
-CW (CW? COMMA) @StateContext{-> State};
+CW (CW? COMMA) @StateContext{-PARTOF(State) -> State};
 
 //getSTATE6
 RETAINTYPE(BREAK);
-BREAK CW COMMA W{REGEXP("MD|IN|AS|DE|HI|OR") -> State} NUM{REGEXP(".{5}")};
+BREAK CW COMMA W{REGEXP("MD|IN|AS|DE|HI|OR"), -PARTOF(State) -> State} NUM{REGEXP(".{5}")};
 RETAINTYPE;
 

Modified: ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java?rev=1746984&r1=1746983&r2=1746984&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java Mon Jun  6 11:56:42 2016
@@ -32,7 +32,7 @@ import org.apache.uima.ruta.engine.RutaE
 
 public class I2B2Evaluation {
 
-  private static final boolean DEBUG = true;
+  private static final boolean DEBUG = false;
 
   public static void main(String[] args)
           throws ResourceInitializationException, UIMAException, IOException {