You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2016/01/28 14:32:06 UTC

svn commit: r1727344 - in /ctakes/sandbox/ctakes-clinical-deid: pom.xml src/main/resources/META-INF/org.apache.uima.fit/types.txt src/main/ruta/org/apache/ctakes/deid/Deid.ruta src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java

Author: chenpei
Date: Thu Jan 28 13:32:06 2016
New Revision: 1727344

URL: http://svn.apache.org/viewvc?rev=1727344&view=rev
Log:
CTAKES-384 Applying patch.Thanks Peter Klugl.

Modified:
    ctakes/sandbox/ctakes-clinical-deid/pom.xml
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java

Modified: ctakes/sandbox/ctakes-clinical-deid/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/pom.xml?rev=1727344&r1=1727343&r2=1727344&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/pom.xml (original)
+++ ctakes/sandbox/ctakes-clinical-deid/pom.xml Thu Jan 28 13:32:06 2016
@@ -11,10 +11,23 @@
   </parent>
 
   <properties>
-<!--     <ruta-version>2.4.0-SNAPSHOT</ruta-version> -->
-    <ruta-version>2.3.1</ruta-version>
+    <ruta-version>2.4.0</ruta-version>
+<!--     <ruta-version>2.3.1</ruta-version> -->
   </properties>
 
+  <!--  use ruta-2.4.0-rc2 for now until its released -->
+  <repositories>
+    <repository>
+      <id>staged-release</id>
+      <url>https://repository.apache.org/content/repositories/orgapacheuima-1081/</url>
+    </repository>
+  </repositories>
+  <pluginRepositories>
+    <pluginRepository>
+      <id>staged-release</id>
+      <url>https://repository.apache.org/content/repositories/orgapacheuima-1081/</url>
+    </pluginRepository>
+  </pluginRepositories>
 
   <dependencies>
     <dependency>
@@ -26,6 +39,11 @@
       <artifactId>ruta-core</artifactId>
       <version>${ruta-version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.uima</groupId>
+      <artifactId>ruta-core-ext</artifactId>
+      <version>${ruta-version}</version>
+    </dependency>
   </dependencies>
   
   
@@ -154,31 +172,24 @@
 
             </configuration>
           </execution>
-<!--           <execution> -->
-<!--             <id>mtwl</id> -->
-<!--             <phase>generate-resources</phase> -->
-<!--             <goals> -->
-<!--               <goal>mtwl</goal> -->
-<!--             </goals> -->
-<!--             <configuration> -->
-<!--               Compress resulting tree word list. -->
-<!--               default value: true -->
-<!--               <compress>true</compress> -->
-<!--               The source files for the multi tree word list. -->
-<!--               default value: none -->
-<!--               <inputFiles> -->
-<!--                 <directory>${basedir}/src/main/resources/wordlists</directory> -->
-<!--                 <includes> -->
-<!--                   <include>*.txt</include> -->
-<!--                 </includes> -->
-<!--               </inputFiles> -->
-<!--               default value: ${project.build.directory}/generated-sources/ruta/resources/generated.mtwl -->
-<!--               <outputFile>${project.build.directory}/generated-sources/ruta/resources/generated.mtwl</outputFile> -->
-<!--               Source file encoding. -->
-<!--               default value: ${project.build.sourceEncoding} -->
-<!--               <encoding>UTF-8</encoding> -->
-<!--             </configuration> -->
-<!--           </execution> -->
+          <execution>
+            <id>mtwl</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>mtwl</goal>
+            </goals>
+            <configuration>
+              <compress>false</compress>
+              <inputFiles>
+                <directory>${basedir}/src/main/resources/wordlists</directory>
+                <includes>
+                  <include>*.txt</include>
+                </includes>
+              </inputFiles>
+              <outputFile>${project.build.directory}/generated-sources/ruta/resources/generated.mtwl</outputFile>
+              <encoding>UTF-8</encoding>
+            </configuration>
+          </execution>
         </executions>
       </plugin>
     </plugins>

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt?rev=1727344&r1=1727343&r2=1727344&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt Thu Jan 28 13:32:06 2016
@@ -15,4 +15,5 @@ classpath*:org/apache/ctakes/coreference
 classpath*:org/apache/ctakes/drugner/types/TypeSystem.xml
 classpath*:org/apache/ctakes/padtermspotter/types/TypeSystem.xml
 classpath*:org/apache/ctakes/smokingstatus/types/TypeSystem.xml
-classpath*:org/apache/ctakes/sideeffect/types/TypeSystem.xml
\ No newline at end of file
+classpath*:org/apache/ctakes/sideeffect/types/TypeSystem.xml
+classpath*:org/apache/ctakes/deid/DeidRutaTypeSystem.xml
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta?rev=1727344&r1=1727343&r2=1727344&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta Thu Jan 28 13:32:06 2016
@@ -1,10 +1,22 @@
 PACKAGE org.apache.ctakes.deid;
 
 TYPESYSTEM org.apache.ctakes.typesystem.types.TypeSystem;
+SCRIPT org.apache.ctakes.deid.Dictionaries;
+SCRIPT org.apache.ctakes.deid.ZipState;
+SCRIPT org.apache.ctakes.deid.Street;
+SCRIPT org.apache.ctakes.deid.UserName;
 
-CW{-> IdentifiedAnnotation};
-PACKAGE org.apache.ctakes.deid;
+CALL(Dictionaries);
+CALL(ZipState);
+CALL(Street);
+CALL(UserName);
 
-TYPESYSTEM org.apache.ctakes.typesystem.types.TypeSystem;
+// map types of ruta scripts to cTAKES types
+// TODO select the correct types and fill the features
+Zip{-> IdentifiedAnnotation};
+State{-> IdentifiedAnnotation};
+Email{-> IdentifiedAnnotation};
+Url{-> IdentifiedAnnotation};
+Street{-> IdentifiedAnnotation};
+UserName{-> IdentifiedAnnotation};
 
-CW{-> IdentifiedAnnotation};
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java?rev=1727344&r1=1727343&r2=1727344&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java Thu Jan 28 13:32:06 2016
@@ -18,11 +18,13 @@
  */
 package org.apache.ctakes.deid;
 
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.URL;
 import java.util.Collection;
 
 import junit.framework.Assert;
 
-import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.uima.fit.factory.AggregateBuilder;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -34,77 +36,40 @@ import org.junit.Test;
 
 public class DeidPipelineTest {
 
-  @Test
-  public void test() throws Exception {
-    String descriptorPath = "target/generated-sources/ruta/descriptor/org/apache/ctakes/deid/DeidRutaAnnotator.xml";
-
-    String document = "Hello World!";
-    JCas jcas = JCasFactory.createJCas();
-    jcas.setDocumentText(document);
-
-    AggregateBuilder builder = new AggregateBuilder();
-    builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
-    builder.add(AnalysisEngineFactory.createEngineDescriptionFromPath(descriptorPath));
-
-    SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());
-
-    Collection<IdentifiedAnnotation> select = JCasUtil.select(jcas, IdentifiedAnnotation.class);
-    Assert.assertEquals(2, select.size());
-  }
-
-}
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.deid;
-
-import java.util.Collection;
-
-import junit.framework.Assert;
-
-import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.uima.fit.factory.AggregateBuilder;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.JCasFactory;
-import org.apache.uima.fit.pipeline.SimplePipeline;
-import org.apache.uima.fit.util.JCasUtil;
-import org.apache.uima.jcas.JCas;
-import org.junit.Test;
-
-public class DeidPipelineTest {
+  private String descriptorPath = "target/generated-sources/ruta/descriptor/org/apache/ctakes/deid/DeidRutaAnnotator.xml";
 
   @Test
-  public void test() throws Exception {
-    String descriptorPath = "target/generated-sources/ruta/descriptor/org/apache/ctakes/deid/DeidRutaAnnotator.xml";
+  public void testExamples() throws Exception {
 
-    String document = "Hello World!";
-    JCas jcas = JCasFactory.createJCas();
-    jcas.setDocumentText(document);
+    URL examplesResource = getClass().getResource("examples.csv");
+    BufferedReader in = new BufferedReader(new InputStreamReader(examplesResource.openStream()));
 
+    JCas jcas = JCasFactory.createJCas();
     AggregateBuilder builder = new AggregateBuilder();
-    builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
+    // not needed right now
+    // builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
     builder.add(AnalysisEngineFactory.createEngineDescriptionFromPath(descriptorPath));
 
-    SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());
+    String line = null;
+    while ((line = in.readLine()) != null) {
+      String[] split = line.split(";");
+      String documentText = split[0];
+      jcas.reset();
+      jcas.setDocumentText(documentText);
+
+      SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());
+      Collection<IdentifiedAnnotation> select = JCasUtil.select(jcas, IdentifiedAnnotation.class);
+      Assert.assertEquals(documentText, split.length - 1, select.size());
+      int counter = 1;
+      for (IdentifiedAnnotation identifiedAnnotation : select) {
+        String actual = identifiedAnnotation.getCoveredText();
+        String expected = split[counter];
+        Assert.assertEquals(expected, actual);
+        counter++;
+      }
+    }
 
-    Collection<IdentifiedAnnotation> select = JCasUtil.select(jcas, IdentifiedAnnotation.class);
-    Assert.assertEquals(2, select.size());
+    in.close();
   }
 
 }