You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2017/03/20 14:08:22 UTC
svn commit: r1787775 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
eval/AssertionEvaluation.java medfacts/cleartk/EncoderReusingDataWriter.java
Author: tmill
Date: Mon Mar 20 14:08:22 2017
New Revision: 1787775
URL: http://svn.apache.org/viewvc?rev=1787775&view=rev
Log:
New data-writer that will reuse existing encoders so that different runs have the same feature space mapping.
Added:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1787775&r1=1787774&r2=1787775&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Mon Mar 20 14:08:22 2017
@@ -40,6 +40,7 @@ import org.apache.ctakes.assertion.medfa
import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine.FEATURE_CONFIG;
import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.EncoderReusingDataWriter;
import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
@@ -359,7 +360,8 @@ private static Logger logger = Logger.ge
// dw = TKLibSvmStringOutcomeDataWriter.class;
throw new UnsupportedOperationException("This requires cleartk-2.0 which");
}
- dw = LibLinearStringOutcomeDataWriter.class;
+// dw = LibLinearStringOutcomeDataWriter.class;
+ dw = EncoderReusingDataWriter.class;
AssertionEvaluation evaluation = new AssertionEvaluation(
modelsDir,
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java?rev=1787775&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java Mon Mar 20 14:08:22 2017
@@ -0,0 +1,46 @@
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.util.Scanner;
+
+import org.cleartk.ml.encoder.outcome.StringToIntegerOutcomeEncoder;
+import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
+import org.cleartk.ml.liblinear.encoder.FeatureNodeArrayEncoder;
+
+public class EncoderReusingDataWriter extends LibLinearStringOutcomeDataWriter {
+
+ public EncoderReusingDataWriter(File outputDirectory)
+ throws FileNotFoundException {
+ super(outputDirectory);
+ File encoderFile = new File(outputDirectory, "encoders.ser");
+ if(encoderFile.exists()){
+ try {
+ ObjectInputStream ois = new ObjectInputStream(new FileInputStream(encoderFile));
+ this.setFeaturesEncoder((FeatureNodeArrayEncoder) ois.readObject());
+ ois.close();
+ } catch (ClassNotFoundException | IOException e) {
+ e.printStackTrace();
+ throw new FileNotFoundException("Problem loading encoder from encoders.ser");
+ }
+ }
+
+ File outputEncoderFile = new File(outputDirectory, "outcome-lookup.txt");
+ if(outputEncoderFile.exists()){
+ StringToIntegerOutcomeEncoder outcomeEncoder = new StringToIntegerOutcomeEncoder();
+ try(Scanner scanner = new Scanner(outputEncoderFile)){
+ String line;
+ while(scanner.hasNextLine()){
+ line = scanner.nextLine();
+ String[] ind_val = line.split(" ");
+ outcomeEncoder.encode(ind_val[1]);
+ }
+ }
+ this.setOutcomeEncoder(outcomeEncoder);
+ }
+ }
+
+}