You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/12/03 16:09:58 UTC

svn commit: r1547420 - in /opennlp/addons/liblinear-addon: ./ LiblinearParams.txt pom.xml src/ src/main/ src/main/java/ src/main/java/LiblinearModel.java src/main/java/LiblinearModelSerializer.java src/main/java/LiblinearTrainer.java

Author: joern
Date: Tue Dec  3 15:09:57 2013
New Revision: 1547420

URL: http://svn.apache.org/r1547420
Log:
 OPENNLP-624 Initial check in of the liblinear integration

Added:
    opennlp/addons/liblinear-addon/
    opennlp/addons/liblinear-addon/LiblinearParams.txt
    opennlp/addons/liblinear-addon/pom.xml
    opennlp/addons/liblinear-addon/src/
    opennlp/addons/liblinear-addon/src/main/
    opennlp/addons/liblinear-addon/src/main/java/
    opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java
    opennlp/addons/liblinear-addon/src/main/java/LiblinearModelSerializer.java
    opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java

Added: opennlp/addons/liblinear-addon/LiblinearParams.txt
URL: http://svn.apache.org/viewvc/opennlp/addons/liblinear-addon/LiblinearParams.txt?rev=1547420&view=auto
==============================================================================
--- opennlp/addons/liblinear-addon/LiblinearParams.txt (added)
+++ opennlp/addons/liblinear-addon/LiblinearParams.txt Tue Dec  3 15:09:57 2013
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Sample machine learning properties file
+
+Algorithm=LiblinearTrainer
+Iterations=100
+Cutoff=0

Added: opennlp/addons/liblinear-addon/pom.xml
URL: http://svn.apache.org/viewvc/opennlp/addons/liblinear-addon/pom.xml?rev=1547420&view=auto
==============================================================================
--- opennlp/addons/liblinear-addon/pom.xml (added)
+++ opennlp/addons/liblinear-addon/pom.xml Tue Dec  3 15:09:57 2013
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.    
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	
+	<parent>
+	    <groupId>org.apache.opennlp</groupId>
+	    <artifactId>opennlp</artifactId>
+	    <version>1.6.0-SNAPSHOT</version>
+	    <relativePath>../opennlp/pom.xml</relativePath>
+    </parent>
+    
+	<artifactId>opennlp-liblinear-addon</artifactId>
+	<packaging>jar</packaging>
+	<name>Apache OpenNLP Liblinear Addon</name>
+
+	<repositories>
+		<repository>
+			<id>ApacheIncubatorRepository</id>
+			<url>
+				http://people.apache.org/repo/m2-incubating-repository/
+			</url>
+		</repository>
+	</repositories>
+
+	<dependencies>
+		<dependency>
+			<groupId>org.apache.opennlp</groupId>
+			<artifactId>opennlp-tools</artifactId>
+			<version>1.6.0-SNAPSHOT</version>
+		</dependency>
+
+		<dependency>
+		    <groupId>de.bwaldvogel</groupId>
+		    <artifactId>liblinear</artifactId>
+		    <version>1.92</version>
+		</dependency>
+
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-dependency-plugin</artifactId>
+				<version>2.1</version>
+				<executions>
+					<execution>
+						<id>copy-dependencies</id>
+						<phase>package</phase>
+						<goals>
+							<goal>copy-dependencies</goal>
+						</goals>
+						<configuration>
+							<excludeScope>provided</excludeScope>
+							<stripVersion>true</stripVersion>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<configuration>
+          <skipTests>true</skipTests>
+					<argLine>-Xmx512m</argLine>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+</project>

Added: opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java
URL: http://svn.apache.org/viewvc/opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java?rev=1547420&view=auto
==============================================================================
--- opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java (added)
+++ opennlp/addons/liblinear-addon/src/main/java/LiblinearModel.java Tue Dec  3 15:09:57 2013
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.model.ArtifactSerializer;
+import opennlp.tools.util.model.SerializableArtifact;
+import de.bwaldvogel.liblinear.Feature;
+import de.bwaldvogel.liblinear.FeatureNode;
+import de.bwaldvogel.liblinear.Linear;
+import de.bwaldvogel.liblinear.Model;
+
+// TODO: The features need to be serialized with the model
+// the liblinear model only contains the ints and weights,
+// but the string lables get lost ... basically that are two maps.
+
+// One for outcomes, one for the features ...
+
+public class LiblinearModel implements MaxentModel, SerializableArtifact {
+
+  private Model model;
+  
+  // Lets read them from disk, when model is loaded ... 
+  private String outcomeLabels[];
+  private Map<String, Integer> predMap;
+  
+  public LiblinearModel(Model model, String outcomes[], Map<String, Integer> predMap) {
+    this.model = model;
+    this.outcomeLabels = outcomes;
+    this.predMap = predMap;
+  }
+
+  public LiblinearModel(InputStream in) throws IOException {
+    model = Linear.loadModel(new InputStreamReader(in));
+  }
+
+  public double[] eval(String[] features) {
+    
+    // Note: If a feature can't be mapped, it will be ignored!
+    
+    List<Integer> context = new ArrayList<Integer>(features.length);
+    
+    for (int i = 0; i < features.length; i++) {
+      Integer feature = predMap.get(features[i]);
+      
+      if (feature != null) {
+        context.add(feature);
+      }
+    }
+    
+    return eval(context);
+  }
+
+  public double[] eval(String[] context, double[] probs) {
+    return eval(context);
+  }
+
+  public double[] eval(String[] context, float[] values) {
+    return eval(context);
+  }
+
+  private double[] eval(List<Integer> context) {
+    
+    double outcomes[] = new double[outcomeLabels.length];
+    
+    Feature vx[] = new Feature[context.size()];
+    
+    for (int i = 0; i < context.size(); i++) {
+      vx[i] = new FeatureNode(context.get(i) + 1, 1d);
+    }
+    
+    Linear.predictProbability(model, vx, outcomes);
+    
+    return outcomes;
+  }
+  
+  public String getAllOutcomes(double[] outcomes) {
+    // TODO: Return prev outcomes ..
+    return null;
+  }
+
+  public String getBestOutcome(double[] ocs) {
+    int best = 0;
+    for (int i = 1; i < ocs.length; i++)
+        if (ocs[i] > ocs[best]) best = i;
+    return outcomeLabels[best];
+  }
+
+  // TODO: This method needs to go away from the interface ... !!!
+  public Object[] getDataStructures() {
+    return null;
+  }
+
+  public int getIndex(String outcome) {
+    for (int i = 0; i < outcomeLabels.length; i++) {
+      if (outcomeLabels[i].equals(outcome)) {
+        return i;
+      }
+    }
+    
+    return -1;
+  }
+
+  public int getNumOutcomes() {
+    return outcomeLabels.length;
+  }
+
+  public String getOutcome(int i) {
+    return outcomeLabels[i];
+  }
+
+  public void serialize(OutputStream out) throws IOException {
+    
+  }
+  
+  public Class<?> getSerializerClass() {
+    return LiblinearModelSerializer.class;
+  }
+
+}

Added: opennlp/addons/liblinear-addon/src/main/java/LiblinearModelSerializer.java
URL: http://svn.apache.org/viewvc/opennlp/addons/liblinear-addon/src/main/java/LiblinearModelSerializer.java?rev=1547420&view=auto
==============================================================================
--- opennlp/addons/liblinear-addon/src/main/java/LiblinearModelSerializer.java (added)
+++ opennlp/addons/liblinear-addon/src/main/java/LiblinearModelSerializer.java Tue Dec  3 15:09:57 2013
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+
+public class LiblinearModelSerializer implements
+    ArtifactSerializer<LiblinearModel> {
+
+  public LiblinearModel create(InputStream in) throws IOException,
+      InvalidFormatException {
+    return new LiblinearModel(in);
+  }
+
+  public void serialize(LiblinearModel model, OutputStream out)
+      throws IOException {
+    model.serialize(out);
+  }
+}

Added: opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java
URL: http://svn.apache.org/viewvc/opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java?rev=1547420&view=auto
==============================================================================
--- opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java (added)
+++ opennlp/addons/liblinear-addon/src/main/java/LiblinearTrainer.java Tue Dec  3 15:09:57 2013
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import de.bwaldvogel.liblinear.Feature;
+import de.bwaldvogel.liblinear.FeatureNode;
+import de.bwaldvogel.liblinear.Linear;
+import de.bwaldvogel.liblinear.Model;
+import de.bwaldvogel.liblinear.Parameter;
+import de.bwaldvogel.liblinear.Problem;
+import de.bwaldvogel.liblinear.SolverType;
+import de.bwaldvogel.liblinear.Train;
+import opennlp.tools.ml.AbstractEventTrainer;
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.ml.model.MaxentModel;
+
+public class LiblinearTrainer extends AbstractEventTrainer {
+
+  public LiblinearTrainer(Map<String, String> trainParams,
+      Map<String, String> reportMap) {
+    super(trainParams, reportMap);
+    
+    //  TODO: Extract solver type here
+    // depending on it, extract parameters
+    // e.g. bias, C, eps for L1_LR
+    
+  }
+
+  private static Problem constructProblem(List<Double> vy, List<Feature[]> vx, int maxIndex, double bias) {
+    
+    // Initialize problem
+    Problem problem = new Problem();
+    problem.l = vy.size();
+    problem.n = maxIndex;
+    problem.bias = bias;
+
+    if (bias >= 0) {
+      problem.n++;
+    }
+
+    problem.x = new Feature[problem.l][];
+
+    for (int i = 0; i < problem.l; i++) {
+      problem.x[i] = vx.get(i);
+
+      if (bias >= 0) {
+        problem.x[i][problem.x[i].length - 1] = new FeatureNode(max_index + 1, bias);
+      }
+    }
+
+    problem.y = new double[problem.l];
+
+    for (int i = 0; i < problem.l; i++) {
+      problem.y[i] = vy.get(i).doubleValue();
+    }
+    
+    return problem;
+  }
+
+  @Override
+  public MaxentModel doTrain(DataIndexer indexer) throws IOException {
+
+    List<Double> vy = new ArrayList<Double>();
+    List<Feature[]> vx = new ArrayList<Feature[]>();
+
+    // outcomes
+    int outcomes[] = indexer.getOutcomeList();
+
+    final int bias = 0;
+    
+    int max_index = 0;
+    
+    // For each event ...
+    for (int i = 0; i < indexer.getContexts().length; i++) {
+
+      int outcome = outcomes[i];
+      vy.add(Double.valueOf(outcome));
+
+      int features[] = indexer.getContexts()[i];
+
+      Feature[] x;
+      if (bias >= 0) {
+        x = new Feature[features.length + 1];
+      } else {
+        x = new Feature[features.length];
+      }
+
+      // for each feature ...
+      for (int fi = 0; fi < features.length; fi++) {
+        x[fi] = new FeatureNode(features[fi] + 1, indexer.getNumTimesEventsSeen()[fi]);
+      } 
+
+      if (features.length > 0) {
+        max_index = Math.max(max_index, x[features.length - 1].getIndex());
+      }
+      
+      vx.add(x);
+    }
+
+    Problem problem = constructProblem(vy, vx, max_index, bias);
+    Parameter parameter = new Parameter(SolverType.L1R_LR, 1d, 0.001d);
+    
+    Model liblinearModel = Linear.train(problem, parameter);
+
+    Map<String, Integer> predMap = new HashMap<String, Integer>();
+    
+    String predLabels[] = indexer.getPredLabels();
+    for (int i = 0; i < predLabels.length; i++) {
+      predMap.put(predLabels[i], i);
+    }
+    
+    return new LiblinearModel(liblinearModel, indexer.getOutcomeLabels(), predMap);
+  }
+
+  @Override
+  public boolean isSortAndMerge() {
+    return true;
+  }
+
+  public static void main(String[] args) throws Exception {
+
+    File file = File.createTempFile("svm", "test");
+    file.deleteOnExit();
+
+    Collection<String> lines = new ArrayList<String>();
+    lines.add("1 1:1 3:1 4:1 6:1");
+    lines.add("2 2:1 3:1 5:1 7:1");
+    lines.add("1 3:1 5:1");
+    lines.add("1 1:1 4:1 7:1");
+    lines.add("2 4:1 5:1 7:1");
+    lines.add("1 1:1 4:1 7:1");
+    lines.add("2 4:1 5:1 7:1");
+
+    BufferedWriter writer = new BufferedWriter(new FileWriter(file));
+    try {
+      for (String line : lines)
+        writer.append(line).append("\n");
+    } finally {
+      writer.close();
+    }
+
+    Train train = new Train();
+
+    Problem problem = train.readProblem(file, 0d);
+
+    Model model = Linear.train(problem, new Parameter(SolverType.L1R_LR, 10d,
+        0.02d));
+    
+    double result = Linear.predict(model, new Feature[]{new FeatureNode(4, 1d), new FeatureNode(1, 1d)});
+    double outcomes[] = new double[2];
+    double result2 = Linear.predictProbability(model, new Feature[]{new FeatureNode(4, 1d), new FeatureNode(1, 1d)}, outcomes);
+
+    System.out.println(result);
+  }
+}