You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/05/03 17:28:07 UTC

svn commit: r1742129 - in /ctakes/sandbox/ctakes-wsd: ./ .settings/ src/main/java/org/apache/ctakes/consumers/ src/main/java/org/apache/ctakes/pipelines/

Author: dligach
Date: Tue May  3 15:28:06 2016
New Revision: 1742129

URL: http://svn.apache.org/viewvc?rev=1742129&view=rev
Log:
pipeline for printing gold events plus a few other minor updates

Added:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/EventPrinter.java
Modified:
    ctakes/sandbox/ctakes-wsd/.classpath
    ctakes/sandbox/ctakes-wsd/.settings/org.eclipse.jdt.core.prefs
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/DataForWord2Vec.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/MimicWord2VecPreprocessing.java

Modified: ctakes/sandbox/ctakes-wsd/.classpath
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/.classpath?rev=1742129&r1=1742128&r2=1742129&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/.classpath (original)
+++ ctakes/sandbox/ctakes-wsd/.classpath Tue May  3 15:28:06 2016
@@ -27,7 +27,7 @@
 			<attribute name="maven.pomderived" value="true"/>
 		</attributes>
 	</classpathentry>
-	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
 		<attributes>
 			<attribute name="maven.pomderived" value="true"/>
 		</attributes>

Modified: ctakes/sandbox/ctakes-wsd/.settings/org.eclipse.jdt.core.prefs
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/.settings/org.eclipse.jdt.core.prefs?rev=1742129&r1=1742128&r2=1742129&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/.settings/org.eclipse.jdt.core.prefs (original)
+++ ctakes/sandbox/ctakes-wsd/.settings/org.eclipse.jdt.core.prefs Tue May  3 15:28:06 2016
@@ -1,5 +1,5 @@
 eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
-org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.compliance=1.8
 org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
-org.eclipse.jdt.core.compiler.source=1.7
+org.eclipse.jdt.core.compiler.source=1.8

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/DataForWord2Vec.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/DataForWord2Vec.java?rev=1742129&r1=1742128&r2=1742129&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/DataForWord2Vec.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/DataForWord2Vec.java Tue May  3 15:28:06 2016
@@ -75,7 +75,9 @@ public class DataForWord2Vec {
 
 		  for(BaseToken token : JCasUtil.select(systemView, BaseToken.class)) { 
 		    String stringValue = tokenToString(token);
-		    System.out.print(stringValue + " ");
+		    if(stringValue != null) {
+		      System.out.print(stringValue + " ");
+		    }
 		  }
 		}
 		
@@ -93,7 +95,7 @@ public class DataForWord2Vec {
 		      stringValue = tokenText;
 		      break;
 		    case "NewlineToken":
-		      stringValue = "";
+		      stringValue = null;
 		      break;
 		    case "NumToken":
 		      stringValue = "number_token";

Added: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/EventPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/EventPrinter.java?rev=1742129&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/EventPrinter.java (added)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/EventPrinter.java Tue May  3 15:28:06 2016
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.consumers;
+
+import java.io.File;
+
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.Utils;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
+/**
+ * Read cTAKES annotations from XMI files.
+ *  
+ * @author dmitriy dligach
+ */
+public class EventPrinter {
+  
+  static interface Options {
+
+    @Option(
+        longName = "xmi-dir",
+        description = "path to xmi files")
+    public File getInputDirectory();
+  }
+  
+	public static void main(String[] args) throws Exception {
+		  
+		Options options = CliFactory.parseArguments(Options.class, args);
+    CollectionReader collectionReader = Utils.getCollectionReader(options.getInputDirectory());
+    AnalysisEngine annotationConsumer = AnalysisEngineFactory.createEngine(RelationContextPrinter.class);
+		SimplePipeline.runPipeline(collectionReader, annotationConsumer);
+	}
+
+  /**
+   * Print events and entities.
+   *  
+   * @author dmitriy dligach
+   */
+  public static class RelationContextPrinter extends JCasAnnotator_ImplBase {
+    
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      
+      for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+        String withLineBreaks = sentence.getCoveredText();
+        String noLineBreaks = withLineBreaks.replace("\n", " ");
+        System.out.println(" * " + noLineBreaks);
+        System.out.println();
+      }
+    }
+  }
+}
+
+  
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java?rev=1742129&r1=1742128&r2=1742129&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java Tue May  3 15:28:06 2016
@@ -19,11 +19,15 @@
 package org.apache.ctakes.consumers;
 
 import java.io.File;
+import java.util.List;
 
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.ctakes.utils.Utils;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -40,7 +44,7 @@ import com.lexicalscope.jewel.cli.Option
  * @author dmitriy dligach
  */
 public class SentencePrinter {
-  
+
   static interface Options {
 
     @Option(
@@ -48,33 +52,51 @@ public class SentencePrinter {
         description = "path to xmi files")
     public File getInputDirectory();
   }
-  
-	public static void main(String[] args) throws Exception {
-		  
-		Options options = CliFactory.parseArguments(Options.class, args);
+
+  public static void main(String[] args) throws Exception {
+
+    Options options = CliFactory.parseArguments(Options.class, args);
     CollectionReader collectionReader = Utils.getCollectionReader(options.getInputDirectory());
-    AnalysisEngine annotationConsumer = AnalysisEngineFactory.createEngine(RelationContextPrinter.class);
-		SimplePipeline.runPipeline(collectionReader, annotationConsumer);
-	}
+    AnalysisEngine annotationConsumer = AnalysisEngineFactory.createEngine(EventPrinter.class);
+    SimplePipeline.runPipeline(collectionReader, annotationConsumer);
+  }
 
   /**
    * Print events and entities.
    *  
    * @author dmitriy dligach
    */
-  public static class RelationContextPrinter extends JCasAnnotator_ImplBase {
-    
+  public static class EventPrinter extends JCasAnnotator_ImplBase {
+
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
-      
-      for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
-        String withLineBreaks = sentence.getCoveredText();
-        String noLineBreaks = withLineBreaks.replace("\n", " ");
-        System.out.println(" * " + noLineBreaks);
+
+      // gold EventMention(s) are all in gold view
+      JCas goldView;
+      try {
+        goldView = jCas.getView("GoldView");
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+
+      // system view has sentence segmentation, tokens, and dictionary lookup events
+      JCas systemView;
+      try {
+        systemView = jCas.getView("_InitialView");
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+
+      for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) {
+        for(BaseToken baseToken : JCasUtil.selectCovered(systemView, BaseToken.class, sentence)) {
+          List<EventMention> events = JCasUtil.selectCovering(goldView, EventMention.class, baseToken.getBegin(), baseToken.getEnd());
+          if(events.size() > 0) {
+            System.out.println("[" + baseToken.getCoveredText() + "] ");
+          }
+          System.out.print(baseToken.getCoveredText() + " ");
+        }
         System.out.println();
       }
     }
   }
 }
-
-  
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/MimicWord2VecPreprocessing.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/MimicWord2VecPreprocessing.java?rev=1742129&r1=1742128&r2=1742129&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/MimicWord2VecPreprocessing.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/MimicWord2VecPreprocessing.java Tue May  3 15:28:06 2016
@@ -36,8 +36,8 @@ import com.google.common.io.CharStreams;
 
 public class MimicWord2VecPreprocessing {
 
-  public static File inputDirectory = new File("/Users/Dima/Loyola/Data/MimicIII/Text/0/");
-  public static String outputDirectory = "/Users/Dima/Loyola/Data/MimicIII/Xmi/0/";
+  public static File inputDirectory = new File("/Users/Dima/Loyola/Data/MimicIII/Text/4/");
+  public static String outputDirectory = "/Users/Dima/Loyola/Data/MimicIII/Xmi/4/";
 
   public static void main(String[] args) throws Exception {