You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by br...@apache.org on 2013/07/07 21:47:53 UTC

svn commit: r1500524 [4/4] - in /ctakes/sandbox/ctakes-scrubber-deid: csv/ data/ data/input/ data/input/cases/ data/input/cases/test/ data/input/cases/train/ data/input/phi/ data/input/phi/test/ data/input/phi/train/ data/input/pubs/ data/input/pubs/pr...

Added: ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header.txt?rev=1500524&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header.txt (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header.txt Sun Jul  7 19:47:51 2013
@@ -0,0 +1,32 @@
+@relation t_n_t
+
+@attribute conckey string
+@attribute pos {CT,POS,none,CD,NNS,JJ,LS,DT,IN,NN,VBN,NNP,CC,VBP,RB,VB,VBD,VBZ,VBG,PRP$,PRP,JJR,FW,WDT,EX,TO,WP,MD,apos,RP,WRB,SYM,NNPS,JJS,RBS,RBR,comma,UH,period,colon,PDT,WP$}
+@attribute pos_bin {Numbers,Nouns,Adjectives,com-dep-wd,Verbs,Adverbs,Pronouns,FW-Symb,apos,comma,period,unknown}
+@attribute has_capital numeric
+@attribute cnt_regex_phon numeric
+@attribute cnt_regex_date numeric
+@attribute cnt_regex_age numeric
+@attribute cnt_regex_id numeric
+@attribute cnt_regex_pat numeric
+@attribute cnt_regex_doc numeric
+@attribute cnt_regex_loc numeric
+@attribute cnt_regex_hosp numeric
+@attribute cnt_priv numeric
+@attribute cnt_hosp numeric
+@attribute cnt_name numeric
+@attribute cnt_dict_costar numeric
+@attribute cnt_dict_hl7v25 numeric
+@attribute cnt_dict_hl7v30 numeric
+@attribute cnt_dict_icd10cm numeric
+@attribute cnt_dict_icd10pcs numeric
+@attribute cnt_dict_icd9cm numeric
+@attribute cnt_dict_lnc numeric
+@attribute cnt_dict_msh numeric
+@attribute cnt_dict_rxnorm numeric
+@attribute cnt_dict_snomedct numeric
+@attribute cnt_ham_w_pos numeric
+@attribute cnt_ham_wo_pos numeric
+@attribute phi_type {NA,ID,HOSPITAL,DOCTOR,DATE,PATIENT,LOCATION,PHONE,AGE}
+
+@data

Propchange: ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header_with_crf.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header_with_crf.txt?rev=1500524&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header_with_crf.txt (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header_with_crf.txt Sun Jul  7 19:47:51 2013
@@ -0,0 +1 @@
+"1_WordToken_cap_0", "2_WordToken_cap_3", "3_WordToken_cap_1", "4_WordToken_cap_2", "5_WordToken_BaseToken_", "6_PunctuationToken_BaseToken_", "7_NumToken_BaseToken_", "8_SymbolToken_BaseToken_", "9_ContractionToken_BaseToken_", "10_RomanNumeralAnnotation_Annotation_", "11_FractionAnnotation_Annotation_", "12_DateAnnotation_Annotation_", "13_MeasurementAnnotation_Annotation_", "14_RangeAnnotation_Annotation_", "15_PersonTitleAnnotation_Annotation_", "16_TimeAnnotation_Annotation_", "17_bin_pos_Adjectives", "18_bin_pos_Adverbs", "19_bin_pos_com-dep-wd", "20_bin_pos_FW-Symb", "21_bin_pos_Nouns", "22_bin_pos_Numbers", "23_bin_pos_Pronouns", "24_bin_pos_Verbs", "25_bin_regex_address", "26_bin_regex_age", "27_bin_regex_date", "28_bin_regex_doctor", "29_bin_regex_hospital", "30_bin_regex_patient", "31_bin_regex_phone", "32_BaseToken_pos_NN", "33_BaseToken_pos_JJ", "34_BaseToken_pos_CD", "35_BaseToken_pos_IN", "36_BaseToken_pos_NNP", "37_BaseToken_pos_DT", "38_BaseToken_pos_NNS", "39_BaseT
 oken_pos_VBD", "40_BaseToken_pos_CC", "41_BaseToken_pos_VBN", "42_BaseToken_pos_RB", "43_BaseToken_pos_PRP", "44_BaseToken_pos_VB", "45_BaseToken_pos_TO", "46_BaseToken_pos_FW", "47_BaseToken_pos_PRP$", "48_BaseToken_pos_VBZ", "49_BaseToken_pos_VBG", "50_BaseToken_pos_LS", "51_BaseToken_pos_MD", "52_BaseToken_pos_WDT", "53_BaseToken_pos_VBP", "54_BaseToken_pos_EX", "55_BaseToken_pos_JJR", "56_BaseToken_pos_WP", "57_BaseToken_pos_WRB", "58_BaseToken_pos_RP", "59_BaseToken_pos_SYM", "60_BaseToken_pos_RBR", "61_BaseToken_pos_pos_tic", "62_BaseToken_pos_RBS", "63_BaseToken_pos_JJS", "64_BaseToken_pos_NNPS", "65_BaseToken_pos_UH", "66_BaseToken_pos_pos_period", "67_BaseToken_pos_PDT", "68_BaseToken_pos_pos_comma", "69_BaseToken_pos_pos_colon", "70_BaseToken_pos_WP$", "71_BaseToken_pos_pos_paren", "72_OntologyMatch_regex_DATE22", "73_OntologyMatch_regex_DATE21", "74_OntologyMatch_regex_DATE17", "75_OntologyMatch_regex_DATE18", "76_OntologyMatch_regex_DATE_SEPARATORS", "77_OntologyMatch_re
 gex_DATE1", "78_OntologyMatch_regex_DATE4", "79_OntologyMatch_regex_YEAR_CENTURY", "80_OntologyMatch_regex_SUSPICIOUS_NUM", "81_OntologyMatch_regex_MONTH", "82_OntologyMatch_regex_TELEPHONE1", "83_OntologyMatch_regex_DATE12", "84_OntologyMatch_regex_ADDRESS8", "85_OntologyMatch_regex_DOCTOR2", "86_OntologyMatch_regex_ADDRESS7", "87_OntologyMatch_regex_AGE6", "88_OntologyMatch_regex_DOCTOR_OLDER", "89_OntologyMatch_regex_DOCTOR0_1", "90_OntologyMatch_regex_DATE2", "91_OntologyMatch_regex_DOCTOR_SUBHEAD", "92_OntologyMatch_regex_HOSPITAL_1", "93_OntologyMatch_regex_DOCTOR_GEN", "94_OntologyMatch_regex_DOCTOR0", "95_OntologyMatch_regex_ACCESSION_KP", "96_OntologyMatch_regex_SSN", "97_OntologyMatch_regex_WRITTEN_AGE_1_TO_19", "98_OntologyMatch_regex_MS", "99_OntologyMatch_regex_AGE5", "100_OntologyMatch_regex_DATE7", "101_OntologyMatch_regex_DATE8", "102_OntologyMatch_regex_DATE14", "103_OntologyMatch_regex_SUSPICIOUS_NUM2", "104_OntologyMatch_regex_private", "105_OntologyMatch_regex_DO
 CTOR0_00", "106_OntologyMatch_regex_RN_KPNW", "107_OntologyMatch_regex_TELEPHONE0", "108_OntologyMatch_regex_TELEPHONE3", "109_OntologyMatch_regex_ID_I2B2_SMOK_3", "110_OntologyMatch_regex_hospital", "111_OntologyMatch_regex_MR", "112_OntologyMatch_regex_ID_I2B2_SMOK", "113_OntologyMatch_regex_DATE15", "114_OntologyMatch_regex_ID_I2B2_SMOK_2", "115_OntologyMatch_regex_TITLES", "116_OntologyMatch_regex_ROOM", "117_OntologyMatch_regex_EXTENSION", "118_OntologyMatch_regex_ADDRESS5", "119_OntologyMatch_regex_DATE13", "120_OntologyMatch_regex_HOSPITAL_2", "121_OntologyMatch_regex_ADDRESS2", "122_OntologyMatch_regex_AGE8", "123_OntologyMatch_regex_DOCTOR4", "124_OntologyMatch_regex_DOCTOR0_2", "125_OntologyMatch_regex_WRITTEN_AGE_10_100_DIV10", "126_OntologyMatch_regex_POBOX", "127_OntologyMatch_regex_DATE_23", "128_OntologyMatch_regex_AGE", "129_OntologyMatch_regex_AGE4", "130_OntologyMatch_regex_DATE16", "131_OntologyMatch_regex_ADDRESS6", "132_OntologyMatch_regex_WRITTEN_AGE_20_TO_99",
  "133_OntologyMatch_regex_DATE20", "134_OntologyMatch_regex_DOCTOR0_0", "135_OntologyMatch_regex_CC2", "136_OntologyMatch_regex_AGE7", "137_OntologyMatch_regex_TELEPHONE2", "138_OntologyMatch_regex_DATE9", "139_OntologyMatch_regex_DATE11", "140_OntologyMatch_regex_SURGEON8", "141_OntologyMatch_regex_DATE19", "142_OntologyMatch_regex_LOCATION_FLOOR_1", "143_OntologyMatch_regex_LOCATION_FLOOR_2", "144_OntologyMatch_regex_DOCTOR3", "145_OntologyMatch_regex_DATE", "146_OntologyMatch_regex_ASST3", "147_OntologyMatch_regex_ADDRESS", "148_OntologyMatch_regex_ASST2", "149_OntologyMatch_SNOMEDCT_", "150_OntologyMatch_LNC_", "151_OntologyMatch_HL7V2.5_", "152_OntologyMatch_HL7V3.0_", "153_OntologyMatch_MSH_", "154_OntologyMatch_regex_", "155_OntologyMatch_COSTAR_", "156_OntologyMatch_RXNORM_", "157_OntologyMatch_ICD9CM_", "158_OntologyMatch_ICD10CM_", "159_OntologyMatch_dict_", "160_OntologyMatch_ICD10PCS_"
\ No newline at end of file

Propchange: ctakes/sandbox/ctakes-scrubber-deid/data/models/weka_header_with_crf.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain