You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2016/01/28 14:32:06 UTC
svn commit: r1727344 - in /ctakes/sandbox/ctakes-clinical-deid: pom.xml
src/main/resources/META-INF/org.apache.uima.fit/types.txt
src/main/ruta/org/apache/ctakes/deid/Deid.ruta
src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java
Author: chenpei
Date: Thu Jan 28 13:32:06 2016
New Revision: 1727344
URL: http://svn.apache.org/viewvc?rev=1727344&view=rev
Log:
CTAKES-384 Applying patch.Thanks Peter Klugl.
Modified:
ctakes/sandbox/ctakes-clinical-deid/pom.xml
ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt
ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java
Modified: ctakes/sandbox/ctakes-clinical-deid/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/pom.xml?rev=1727344&r1=1727343&r2=1727344&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/pom.xml (original)
+++ ctakes/sandbox/ctakes-clinical-deid/pom.xml Thu Jan 28 13:32:06 2016
@@ -11,10 +11,23 @@
</parent>
<properties>
-<!-- <ruta-version>2.4.0-SNAPSHOT</ruta-version> -->
- <ruta-version>2.3.1</ruta-version>
+ <ruta-version>2.4.0</ruta-version>
+<!-- <ruta-version>2.3.1</ruta-version> -->
</properties>
+ <!-- use ruta-2.4.0-rc2 for now until its released -->
+ <repositories>
+ <repository>
+ <id>staged-release</id>
+ <url>https://repository.apache.org/content/repositories/orgapacheuima-1081/</url>
+ </repository>
+ </repositories>
+ <pluginRepositories>
+ <pluginRepository>
+ <id>staged-release</id>
+ <url>https://repository.apache.org/content/repositories/orgapacheuima-1081/</url>
+ </pluginRepository>
+ </pluginRepositories>
<dependencies>
<dependency>
@@ -26,6 +39,11 @@
<artifactId>ruta-core</artifactId>
<version>${ruta-version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.uima</groupId>
+ <artifactId>ruta-core-ext</artifactId>
+ <version>${ruta-version}</version>
+ </dependency>
</dependencies>
@@ -154,31 +172,24 @@
</configuration>
</execution>
-<!-- <execution> -->
-<!-- <id>mtwl</id> -->
-<!-- <phase>generate-resources</phase> -->
-<!-- <goals> -->
-<!-- <goal>mtwl</goal> -->
-<!-- </goals> -->
-<!-- <configuration> -->
-<!-- Compress resulting tree word list. -->
-<!-- default value: true -->
-<!-- <compress>true</compress> -->
-<!-- The source files for the multi tree word list. -->
-<!-- default value: none -->
-<!-- <inputFiles> -->
-<!-- <directory>${basedir}/src/main/resources/wordlists</directory> -->
-<!-- <includes> -->
-<!-- <include>*.txt</include> -->
-<!-- </includes> -->
-<!-- </inputFiles> -->
-<!-- default value: ${project.build.directory}/generated-sources/ruta/resources/generated.mtwl -->
-<!-- <outputFile>${project.build.directory}/generated-sources/ruta/resources/generated.mtwl</outputFile> -->
-<!-- Source file encoding. -->
-<!-- default value: ${project.build.sourceEncoding} -->
-<!-- <encoding>UTF-8</encoding> -->
-<!-- </configuration> -->
-<!-- </execution> -->
+ <execution>
+ <id>mtwl</id>
+ <phase>generate-resources</phase>
+ <goals>
+ <goal>mtwl</goal>
+ </goals>
+ <configuration>
+ <compress>false</compress>
+ <inputFiles>
+ <directory>${basedir}/src/main/resources/wordlists</directory>
+ <includes>
+ <include>*.txt</include>
+ </includes>
+ </inputFiles>
+ <outputFile>${project.build.directory}/generated-sources/ruta/resources/generated.mtwl</outputFile>
+ <encoding>UTF-8</encoding>
+ </configuration>
+ </execution>
</executions>
</plugin>
</plugins>
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt?rev=1727344&r1=1727343&r2=1727344&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt Thu Jan 28 13:32:06 2016
@@ -15,4 +15,5 @@ classpath*:org/apache/ctakes/coreference
classpath*:org/apache/ctakes/drugner/types/TypeSystem.xml
classpath*:org/apache/ctakes/padtermspotter/types/TypeSystem.xml
classpath*:org/apache/ctakes/smokingstatus/types/TypeSystem.xml
-classpath*:org/apache/ctakes/sideeffect/types/TypeSystem.xml
\ No newline at end of file
+classpath*:org/apache/ctakes/sideeffect/types/TypeSystem.xml
+classpath*:org/apache/ctakes/deid/DeidRutaTypeSystem.xml
\ No newline at end of file
Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta?rev=1727344&r1=1727343&r2=1727344&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta Thu Jan 28 13:32:06 2016
@@ -1,10 +1,22 @@
PACKAGE org.apache.ctakes.deid;
TYPESYSTEM org.apache.ctakes.typesystem.types.TypeSystem;
+SCRIPT org.apache.ctakes.deid.Dictionaries;
+SCRIPT org.apache.ctakes.deid.ZipState;
+SCRIPT org.apache.ctakes.deid.Street;
+SCRIPT org.apache.ctakes.deid.UserName;
-CW{-> IdentifiedAnnotation};
-PACKAGE org.apache.ctakes.deid;
+CALL(Dictionaries);
+CALL(ZipState);
+CALL(Street);
+CALL(UserName);
-TYPESYSTEM org.apache.ctakes.typesystem.types.TypeSystem;
+// map types of ruta scripts to cTAKES types
+// TODO select the correct types and fill the features
+Zip{-> IdentifiedAnnotation};
+State{-> IdentifiedAnnotation};
+Email{-> IdentifiedAnnotation};
+Url{-> IdentifiedAnnotation};
+Street{-> IdentifiedAnnotation};
+UserName{-> IdentifiedAnnotation};
-CW{-> IdentifiedAnnotation};
\ No newline at end of file
Modified: ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java?rev=1727344&r1=1727343&r2=1727344&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java Thu Jan 28 13:32:06 2016
@@ -18,11 +18,13 @@
*/
package org.apache.ctakes.deid;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.URL;
import java.util.Collection;
import junit.framework.Assert;
-import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -34,77 +36,40 @@ import org.junit.Test;
public class DeidPipelineTest {
- @Test
- public void test() throws Exception {
- String descriptorPath = "target/generated-sources/ruta/descriptor/org/apache/ctakes/deid/DeidRutaAnnotator.xml";
-
- String document = "Hello World!";
- JCas jcas = JCasFactory.createJCas();
- jcas.setDocumentText(document);
-
- AggregateBuilder builder = new AggregateBuilder();
- builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
- builder.add(AnalysisEngineFactory.createEngineDescriptionFromPath(descriptorPath));
-
- SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());
-
- Collection<IdentifiedAnnotation> select = JCasUtil.select(jcas, IdentifiedAnnotation.class);
- Assert.assertEquals(2, select.size());
- }
-
-}
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.deid;
-
-import java.util.Collection;
-
-import junit.framework.Assert;
-
-import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.uima.fit.factory.AggregateBuilder;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.JCasFactory;
-import org.apache.uima.fit.pipeline.SimplePipeline;
-import org.apache.uima.fit.util.JCasUtil;
-import org.apache.uima.jcas.JCas;
-import org.junit.Test;
-
-public class DeidPipelineTest {
+ private String descriptorPath = "target/generated-sources/ruta/descriptor/org/apache/ctakes/deid/DeidRutaAnnotator.xml";
@Test
- public void test() throws Exception {
- String descriptorPath = "target/generated-sources/ruta/descriptor/org/apache/ctakes/deid/DeidRutaAnnotator.xml";
+ public void testExamples() throws Exception {
- String document = "Hello World!";
- JCas jcas = JCasFactory.createJCas();
- jcas.setDocumentText(document);
+ URL examplesResource = getClass().getResource("examples.csv");
+ BufferedReader in = new BufferedReader(new InputStreamReader(examplesResource.openStream()));
+ JCas jcas = JCasFactory.createJCas();
AggregateBuilder builder = new AggregateBuilder();
- builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
+ // not needed right now
+ // builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
builder.add(AnalysisEngineFactory.createEngineDescriptionFromPath(descriptorPath));
- SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());
+ String line = null;
+ while ((line = in.readLine()) != null) {
+ String[] split = line.split(";");
+ String documentText = split[0];
+ jcas.reset();
+ jcas.setDocumentText(documentText);
+
+ SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());
+ Collection<IdentifiedAnnotation> select = JCasUtil.select(jcas, IdentifiedAnnotation.class);
+ Assert.assertEquals(documentText, split.length - 1, select.size());
+ int counter = 1;
+ for (IdentifiedAnnotation identifiedAnnotation : select) {
+ String actual = identifiedAnnotation.getCoveredText();
+ String expected = split[counter];
+ Assert.assertEquals(expected, actual);
+ counter++;
+ }
+ }
- Collection<IdentifiedAnnotation> select = JCasUtil.select(jcas, IdentifiedAnnotation.class);
- Assert.assertEquals(2, select.size());
+ in.close();
}
}