You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/01/01 19:16:26 UTC

svn commit: r1554666 - in /opennlp/addons/liblinear-addon/src/main/java: LiblinearModel.java LiblinearTrainer.java

Author: joern
Date: Wed Jan  1 18:16:26 2014
New Revision: 1554666

URL: http://svn.apache.org/r1554666
Log:
OPENNLP-624 Fixed a compiliation error, and model can now be loaded and serialized

Modified:
    opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java
    opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java

Modified: opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java
URL: http://svn.apache.org/viewvc/opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java?rev=1554666&r1=1554665&r2=1554666&view=diff
==============================================================================
--- opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java (original)
+++ opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java Wed Jan  1 18:16:26 2014
@@ -17,16 +17,22 @@
  * under the License.
  */
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.SerializableArtifact;
 import de.bwaldvogel.liblinear.Feature;
 import de.bwaldvogel.liblinear.FeatureNode;
@@ -41,6 +47,8 @@ import de.bwaldvogel.liblinear.Model;
 
 public class LiblinearModel implements MaxentModel, SerializableArtifact {
 
+  private static final Charset LIBLINEAR_MODEL_ENCODING = Charset.forName("UTF-8");
+  
   private Model model;
   
   // Lets read them from disk, when model is loaded ... 
@@ -54,7 +62,33 @@ public class LiblinearModel implements M
   }
 
   public LiblinearModel(InputStream in) throws IOException {
-    model = Linear.loadModel(new InputStreamReader(in));
+    
+    DataInputStream di = new DataInputStream(in);
+    
+    int modelByteLength = di.readInt();
+    
+    // TODO: We should have a fixed memory limit here ...
+    
+    byte modelBytes[] = new byte[modelByteLength];
+    di.read(modelBytes);
+    
+    int outcomeLabelLength = di.readInt();
+    
+    outcomeLabels = new String[outcomeLabelLength];
+    for (int i = 0; i < outcomeLabelLength; i++) {
+      outcomeLabels[i] = di.readUTF();
+    }
+    
+    predMap = new HashMap<String, Integer>();
+    
+    int predMapSize = di.readInt();
+    for (int i = 0; i < predMapSize; i++) {
+      String key = di.readUTF();
+      int value = di.readInt();
+      predMap.put(key, value);
+    }
+    
+    model = Linear.loadModel(new InputStreamReader(new ByteArrayInputStream(modelBytes), LIBLINEAR_MODEL_ENCODING));
   }
 
   public double[] eval(String[] features) {
@@ -134,10 +168,33 @@ public class LiblinearModel implements M
 
   public void serialize(OutputStream out) throws IOException {
     
+    DataOutputStream ds = new DataOutputStream(out);
+    
+    ByteArrayOutputStream modelBytes = new ByteArrayOutputStream();
+    Linear.saveModel(new OutputStreamWriter(modelBytes, LIBLINEAR_MODEL_ENCODING), model);
+
+    ds.writeInt(modelBytes.size());
+    ds.write(modelBytes.toByteArray());
+    
+    // write string array
+    // write label count
+    ds.writeInt(outcomeLabels.length);
+    
+    // write each label
+    for (String outcomeLabel : outcomeLabels) {
+      ds.writeUTF(outcomeLabel);
+    }
+
+    // write entry count
+    ds.writeInt(predMap.size());
+    for (Map.Entry<String, Integer> entry : predMap.entrySet()) {
+      ds.writeUTF(entry.getKey());
+      ds.writeInt(entry.getValue());
+    }
   }
-  
-  public Class<?> getSerializerClass() {
+
+  @Override
+  public Class<?> getArtifactSerializerClass() {
     return LiblinearModelSerializer.class;
   }
-
 }

Modified: opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java
URL: http://svn.apache.org/viewvc/opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java?rev=1554666&r1=1554665&r2=1554666&view=diff
==============================================================================
--- opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java (original)
+++ opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java Wed Jan  1 18:16:26 2014
@@ -69,7 +69,7 @@ public class LiblinearTrainer extends Ab
       problem.x[i] = vx.get(i);
 
       if (bias >= 0) {
-        problem.x[i][problem.x[i].length - 1] = new FeatureNode(max_index + 1, bias);
+        problem.x[i][problem.x[i].length - 1] = new FeatureNode(maxIndex + 1, bias);
       }
     }