You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2017/03/20 14:08:22 UTC

svn commit: r1787775 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: eval/AssertionEvaluation.java medfacts/cleartk/EncoderReusingDataWriter.java

Author: tmill
Date: Mon Mar 20 14:08:22 2017
New Revision: 1787775

URL: http://svn.apache.org/viewvc?rev=1787775&view=rev
Log:
New data-writer that will reuse existing encoders so that different runs have the same feature space mapping.

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java
Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1787775&r1=1787774&r2=1787775&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Mon Mar 20 14:08:22 2017
@@ -40,6 +40,7 @@ import org.apache.ctakes.assertion.medfa
 import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine.FEATURE_CONFIG;
 import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.EncoderReusingDataWriter;
 import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
@@ -359,7 +360,8 @@ private static Logger logger = Logger.ge
 //        dw = TKLibSvmStringOutcomeDataWriter.class;
       throw new UnsupportedOperationException("This requires cleartk-2.0 which");
     }
-    dw = LibLinearStringOutcomeDataWriter.class;
+//    dw = LibLinearStringOutcomeDataWriter.class;
+    dw = EncoderReusingDataWriter.class;
     
     AssertionEvaluation evaluation = new AssertionEvaluation(
         modelsDir,

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java?rev=1787775&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java Mon Mar 20 14:08:22 2017
@@ -0,0 +1,46 @@
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.util.Scanner;
+
+import org.cleartk.ml.encoder.outcome.StringToIntegerOutcomeEncoder;
+import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
+import org.cleartk.ml.liblinear.encoder.FeatureNodeArrayEncoder;
+
+public class EncoderReusingDataWriter extends LibLinearStringOutcomeDataWriter {
+
+  public EncoderReusingDataWriter(File outputDirectory)
+      throws FileNotFoundException {
+    super(outputDirectory);
+    File encoderFile = new File(outputDirectory, "encoders.ser");
+    if(encoderFile.exists()){
+      try {
+        ObjectInputStream ois = new ObjectInputStream(new FileInputStream(encoderFile));
+        this.setFeaturesEncoder((FeatureNodeArrayEncoder) ois.readObject());
+        ois.close();
+      } catch (ClassNotFoundException | IOException e) {
+        e.printStackTrace();
+        throw new FileNotFoundException("Problem loading encoder from encoders.ser");
+      }
+    }
+    
+    File outputEncoderFile = new File(outputDirectory, "outcome-lookup.txt");
+    if(outputEncoderFile.exists()){
+      StringToIntegerOutcomeEncoder outcomeEncoder = new StringToIntegerOutcomeEncoder();
+      try(Scanner scanner = new Scanner(outputEncoderFile)){
+        String line;
+        while(scanner.hasNextLine()){
+          line = scanner.nextLine();
+          String[] ind_val = line.split(" ");
+          outcomeEncoder.encode(ind_val[1]);
+        }
+      }
+      this.setOutcomeEncoder(outcomeEncoder);
+    }
+  }
+
+}