You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by vk...@apache.org on 2014/07/21 06:17:58 UTC
svn commit: r1612184 - in /opennlp/trunk/opennlp-tools: lang/ml/
src/main/java/opennlp/tools/ml/maxent/quasinewton/
src/test/java/opennlp/tools/ml/maxent/quasinewton/
Author: vkhuc
Date: Mon Jul 21 04:17:58 2014
New Revision: 1612184
URL: http://svn.apache.org/r1612184
Log:
OPENNLP-703 Added a parallel version of NegLogLikelihood. Added a test case for it. Updated the parameter list of MaxentQN.
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java (with props)
Modified:
opennlp/trunk/opennlp-tools/lang/ml/MaxentQNTrainerParams.txt
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java
Modified: opennlp/trunk/opennlp-tools/lang/ml/MaxentQNTrainerParams.txt
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/lang/ml/MaxentQNTrainerParams.txt?rev=1612184&r1=1612183&r2=1612184&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/lang/ml/MaxentQNTrainerParams.txt (original)
+++ opennlp/trunk/opennlp-tools/lang/ml/MaxentQNTrainerParams.txt Mon Jul 21 04:17:58 2014
@@ -1,37 +1,40 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Sample machine learning properties file
-
-Algorithm=MAXENT_QN
-Iterations=100
-Cutoff=0
-
-# Costs for L1- and L2-regularization. These parameters must be larger or
-# equal to zero. The higher they are, the more penalty will be imposed to
-# avoid overfitting. The parameters can be set as follows:
-# if L1Cost = 0 and L2Cost = 0, no regularization will be used,
-# if L1Cost > 0 and L2Cost = 0, L1 will be used,
-# if L1Cost = 0 and L2Cost > 0, L2 will be used,
-# if both paramters are set to be larger than 0, Elastic Net
-# (i.e. L1 and L2 combined) will be used.
-L1Cost=0.1
-L2Cost=0.1
-
-# Number of Hessian updates to store
-NumOfUpdates=15
-
-# Maximum number of objective function's evaluations
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Sample machine learning properties file
+
+Algorithm=MAXENT_QN
+Iterations=100
+Cutoff=0
+
+# Number of threads
+Threads=1
+
+# Costs for L1- and L2-regularization. These parameters must be larger or
+# equal to zero. The higher they are, the more penalty will be imposed to
+# avoid overfitting. The parameters can be set as follows:
+# if L1Cost = 0 and L2Cost = 0, no regularization will be used,
+# if L1Cost > 0 and L2Cost = 0, L1 will be used,
+# if L1Cost = 0 and L2Cost > 0, L2 will be used,
+# if both paramters are set to be larger than 0, Elastic Net
+# (i.e. L1 and L2 combined) will be used.
+L1Cost=0.1
+L2Cost=0.1
+
+# Number of Hessian updates to store
+NumOfUpdates=15
+
+# Maximum number of objective function's evaluations
MaxFctEval=30000
\ No newline at end of file
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java?rev=1612184&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java Mon Jul 21 04:17:58 2014
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package opennlp.tools.ml.maxent.quasinewton;
+
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import opennlp.tools.ml.model.DataIndexer;
+
+/**
+ * Evaluate negative log-likelihood and its gradient in parallel
+ */
+public class ParallelNegLogLikelihood extends NegLogLikelihood {
+
+ // Number of threads
+ int threads;
+
+ // Partial value of negative log-likelihood to be computed by each thread
+ private double[] negLogLikelihoodThread;
+
+ // Partial gradient
+ private double[][] gradientThread;
+
+ public ParallelNegLogLikelihood(DataIndexer indexer, int threads) {
+ super(indexer);
+
+ if (threads <= 0)
+ throw new IllegalArgumentException(
+ "Number of threads must 1 or larger");
+
+ this.threads = threads;
+ this.negLogLikelihoodThread = new double[threads];
+ this.gradientThread = new double[threads][dimension];
+ }
+
+ /**
+ * Negative log-likelihood
+ */
+ @Override
+ public double valueAt(double[] x) {
+
+ if (x.length != dimension)
+ throw new IllegalArgumentException(
+ "x is invalid, its dimension is not equal to domain dimension.");
+
+ // Compute partial value of negative log-likelihood in each thread
+ computeInParallel(x, NegLLComputeTask.class);
+
+ double negLogLikelihood = 0;
+ for (int t = 0; t < threads; t++) {
+ negLogLikelihood += negLogLikelihoodThread[t];
+ }
+
+ return negLogLikelihood;
+ }
+
+ /**
+ * Compute gradient
+ */
+ @Override
+ public double[] gradientAt(double[] x) {
+
+ if (x.length != dimension)
+ throw new IllegalArgumentException(
+ "x is invalid, its dimension is not equal to the function.");
+
+ // Compute partial gradient in each thread
+ computeInParallel(x, GradientComputeTask.class);
+
+ // Accumulate gradient
+ for (int i = 0; i < dimension; i++) {
+ gradient[i] = 0;
+ for (int t = 0; t < threads; t++) {
+ gradient[i] += gradientThread[t][i];
+ }
+ }
+
+ return gradient;
+ }
+
+ /**
+ * Compute tasks in parallel
+ */
+ private void computeInParallel(double[] x, Class<? extends ComputeTask> taskClass) {
+ ExecutorService executor = Executors.newFixedThreadPool(threads);
+ int taskSize = numContexts / threads;
+ int leftOver = numContexts % threads;
+
+ try {
+ Constructor<? extends ComputeTask> cons = taskClass.getConstructor(
+ ParallelNegLogLikelihood.class,
+ int.class, int.class, int.class, double[].class);
+
+ List<Future<?>> futures = new ArrayList<Future<?>>();
+ for (int i = 0; i < threads; i++) {
+ if (i != threads - 1)
+ futures.add(executor.submit(
+ cons.newInstance(this, i, i*taskSize, taskSize, x)));
+ else
+ futures.add(executor.submit(
+ cons.newInstance(this, i, i*taskSize, taskSize + leftOver, x)));
+ }
+
+ for (Future<?> future: futures)
+ future.get();
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ executor.shutdown();
+ }
+
+ /**
+ * Task that is computed in parallel
+ */
+ abstract class ComputeTask implements Callable<ComputeTask> {
+
+ final int threadIndex;
+
+ // Start index of contexts to compute
+ final int startIndex;
+
+ // Number of contexts to compute
+ final int length;
+
+ final double[] x;
+
+ public ComputeTask(int threadIndex, int startIndex, int length, double[] x) {
+ this.threadIndex = threadIndex;
+ this.startIndex = startIndex;
+ this.length = length;
+ this.x = x;
+ }
+ }
+
+ /**
+ * Task for computing partial value of negative log-likelihood
+ */
+ class NegLLComputeTask extends ComputeTask {
+
+ final double[] tempSums;
+
+ public NegLLComputeTask(int threadIndex, int startIndex, int length, double[] x) {
+ super(threadIndex, startIndex, length, x);
+ this.tempSums = new double[numOutcomes];
+ }
+
+ @Override
+ public NegLLComputeTask call() {
+ int ci, oi, ai, vectorIndex, outcome;
+ double predValue, logSumOfExps;
+ negLogLikelihoodThread[threadIndex] = 0;
+
+ for (ci = startIndex; ci < startIndex + length; ci++) {
+ for (oi = 0; oi < numOutcomes; oi++) {
+ tempSums[oi] = 0;
+ for (ai = 0; ai < contexts[ci].length; ai++) {
+ vectorIndex = indexOf(oi, contexts[ci][ai]);
+ predValue = values != null? values[ci][ai] : 1.0;
+ tempSums[oi] += predValue * x[vectorIndex];
+ }
+ }
+
+ logSumOfExps = ArrayMath.logSumOfExps(tempSums);
+
+ outcome = outcomeList[ci];
+ negLogLikelihoodThread[threadIndex] -=
+ (tempSums[outcome] - logSumOfExps) * numTimesEventsSeen[ci];
+ }
+
+ return this;
+ }
+ }
+
+ /**
+ * Task for computing partial gradient
+ */
+ class GradientComputeTask extends ComputeTask {
+
+ final double[] expectation;
+
+ public GradientComputeTask(int threadIndex, int startIndex, int length, double[] x) {
+ super(threadIndex, startIndex, length, x);
+ this.expectation = new double[numOutcomes];
+ }
+
+ @Override
+ public GradientComputeTask call() {
+ int ci, oi, ai, vectorIndex;
+ double predValue, logSumOfExps;
+ int empirical;
+
+ // Reset gradientThread
+ Arrays.fill(gradientThread[threadIndex], 0);
+
+ for (ci = startIndex; ci < startIndex + length; ci++) {
+ for (oi = 0; oi < numOutcomes; oi++) {
+ expectation[oi] = 0;
+ for (ai = 0; ai < contexts[ci].length; ai++) {
+ vectorIndex = indexOf(oi, contexts[ci][ai]);
+ predValue = values != null? values[ci][ai] : 1.0;
+ expectation[oi] += predValue * x[vectorIndex];
+ }
+ }
+
+ logSumOfExps = ArrayMath.logSumOfExps(expectation);
+
+ for (oi = 0; oi < numOutcomes; oi++) {
+ expectation[oi] = Math.exp(expectation[oi] - logSumOfExps);
+ }
+
+ for (oi = 0; oi < numOutcomes; oi++) {
+ empirical = outcomeList[ci] == oi? 1 : 0;
+ for (ai = 0; ai < contexts[ci].length; ai++) {
+ vectorIndex = indexOf(oi, contexts[ci][ai]);
+ predValue = values != null? values[ci][ai] : 1.0;
+ gradientThread[threadIndex][vectorIndex] +=
+ predValue * (expectation[oi] - empirical) * numTimesEventsSeen[ci];
+ }
+ }
+ }
+
+ return this;
+ }
+ }
+}
\ No newline at end of file
Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java?rev=1612184&r1=1612183&r2=1612184&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java Mon Jul 21 04:17:58 2014
@@ -170,7 +170,7 @@ public class QNMinimizer {
}
/**
- * Find the parameters that minimizes the objective function
+ * Find the parameters that minimize the objective function
* @param function objective function
* @return minimizing parameters
*/
@@ -211,7 +211,7 @@ public class QNMinimizer {
display("\nSolving convex optimization problem.");
display("\nObjective function has " + dimension + " variable(s).");
display("\n\nPerforming " + iterations + " iterations with " +
- "L1-cost = " + l1Cost + " and L2-cost = " + l2Cost + ".\n");
+ "L1Cost=" + l1Cost + " and L2Cost=" + l2Cost + "\n");
}
double[] direction = new double[dimension];
@@ -260,10 +260,12 @@ public class QNMinimizer {
display(iter + ": ");
if (evaluator != null) {
- display("\t " + lsr.getValueAtCurr() + "\t" + lsr.getFuncChangeRate()
+ display("\t" + lsr.getValueAtNext()
+ + "\t" + lsr.getFuncChangeRate()
+ "\t" + evaluator.evaluate(lsr.getNextPoint()) + "\n");
} else {
- display("\t " + lsr.getValueAtCurr() + "\t" + lsr.getFuncChangeRate() + "\n");
+ display("\t " + lsr.getValueAtNext() +
+ "\t" + lsr.getFuncChangeRate() + "\n");
}
}
if (isConverged(lsr))
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java?rev=1612184&r1=1612183&r2=1612184&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java Mon Jul 21 04:17:58 2014
@@ -1,170 +1,169 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package opennlp.tools.ml.maxent.quasinewton;
-
-import opennlp.tools.ml.model.AbstractModel;
-import opennlp.tools.ml.model.Context;
-
-public class QNModel extends AbstractModel {
-
- public QNModel(Context[] params, String[] predLabels, String[] outcomeNames) {
- super(params, predLabels, outcomeNames);
- this.modelType = ModelType.MaxentQn;
- }
-
- public int getNumOutcomes() {
- return this.outcomeNames.length;
- }
-
- private int getPredIndex(String predicate) {
- return pmap.get(predicate);
- }
-
- public double[] eval(String[] context) {
- return eval(context, new double[evalParams.getNumOutcomes()]);
- }
-
- public double[] eval(String[] context, double[] probs) {
- return eval(context, null, probs);
- }
-
- public double[] eval(String[] context, float[] values) {
- return eval(context, values, new double[evalParams.getNumOutcomes()]);
- }
-
- /**
- * Model evaluation which should be used during inference.
- * @param context
- * The predicates which have been observed at the present
- * decision point.
- * @param values
- * Weights of the predicates which have been observed at
- * the present decision point.
- * @param probs
- * Probability for outcomes.
- * @return Normalized probabilities for the outcomes given the context.
- */
- private double[] eval(String[] context, float[] values, double[] probs) {
- Context[] params = evalParams.getParams();
-
- for (int ci = 0; ci < context.length; ci++) {
- int predIdx = getPredIndex(context[ci]);
-
- if (predIdx >= 0) {
- double predValue = 1.0;
- if (values != null) predValue = values[ci];
-
- double[] parameters = params[predIdx].getParameters();
- int[] outcomes = params[predIdx].getOutcomes();
- for (int i = 0; i < outcomes.length; i++) {
- int oi = outcomes[i];
- probs[oi] += predValue * parameters[i];
- }
- }
- }
-
- double logSumExp = ArrayMath.logSumOfExps(probs);
- for (int oi = 0; oi < outcomeNames.length; oi++) {
- probs[oi] = Math.exp(probs[oi] - logSumExp);
- }
- return probs;
- }
-
- /**
- * Model evaluation which should be used during training to report model accuracy.
- * @param context
- * Indices of the predicates which have been observed at the present
- * decision point.
- * @param values
- * Weights of the predicates which have been observed at
- * the present decision point.
- * @param probs
- * Probability for outcomes
- * @param nOutcomes
- * Number of outcomes
- * @param nPredLabels
- * Number of unique predicates
- * @param parameters
- * Model parameters
- * @return Normalized probabilities for the outcomes given the context.
- */
- public static double[] eval(int[] context, float[] values, double[] probs,
- int nOutcomes, int nPredLabels, double[] parameters) {
-
- for (int i = 0; i < context.length; i++) {
- int predIdx = context[i];
- double predValue = 1.0;
- if (values != null) predValue = values[i];
-
- for (int oi = 0; oi < nOutcomes; oi++) {
- probs[oi] += predValue * parameters[oi * nPredLabels + predIdx];
- }
- }
-
- double logSumExp = ArrayMath.logSumOfExps(probs);
- for (int oi = 0; oi < nOutcomes; oi++) {
- probs[oi] = Math.exp(probs[oi] - logSumExp);
- }
-
- return probs;
- }
-
- public boolean equals(Object obj) {
- if (!(obj instanceof QNModel))
- return false;
-
- QNModel objModel = (QNModel) obj;
- if (this.outcomeNames.length != objModel.outcomeNames.length)
- return false;
- for (int i = 0; i < this.outcomeNames.length; i++) {
- if (!this.outcomeNames[i].equals(objModel.outcomeNames[i]))
- return false;
- }
-
- if (this.pmap.size() != objModel.pmap.size())
- return false;
- String[] pmapArray = new String[pmap.size()];
- pmap.toArray(pmapArray);
- for (int i = 0; i < this.pmap.size(); i++) {
- if (i != objModel.pmap.get(pmapArray[i]))
- return false;
- }
-
- // compare evalParameters
- Context[] contextComparing = objModel.evalParams.getParams();
- if (this.evalParams.getParams().length != contextComparing.length)
- return false;
- for (int i = 0; i < this.evalParams.getParams().length; i++) {
- if (this.evalParams.getParams()[i].getOutcomes().length != contextComparing[i].getOutcomes().length)
- return false;
- for (int j = 0; i < this.evalParams.getParams()[i].getOutcomes().length; i++) {
- if (this.evalParams.getParams()[i].getOutcomes()[j] != contextComparing[i].getOutcomes()[j])
- return false;
- }
-
- if (this.evalParams.getParams()[i].getParameters().length != contextComparing[i].getParameters().length)
- return false;
- for (int j = 0; i < this.evalParams.getParams()[i].getParameters().length; i++) {
- if (this.evalParams.getParams()[i].getParameters()[j] != contextComparing[i].getParameters()[j])
- return false;
- }
- }
- return true;
- }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package opennlp.tools.ml.maxent.quasinewton;
+
+import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.ml.model.Context;
+
+public class QNModel extends AbstractModel {
+
+ public QNModel(Context[] params, String[] predLabels, String[] outcomeNames) {
+ super(params, predLabels, outcomeNames);
+ this.modelType = ModelType.MaxentQn;
+ }
+
+ public int getNumOutcomes() {
+ return this.outcomeNames.length;
+ }
+
+ private int getPredIndex(String predicate) {
+ return pmap.get(predicate);
+ }
+
+ public double[] eval(String[] context) {
+ return eval(context, new double[evalParams.getNumOutcomes()]);
+ }
+
+ public double[] eval(String[] context, double[] probs) {
+ return eval(context, null, probs);
+ }
+
+ public double[] eval(String[] context, float[] values) {
+ return eval(context, values, new double[evalParams.getNumOutcomes()]);
+ }
+
+ /**
+ * Model evaluation which should be used during inference.
+ * @param context
+ * The predicates which have been observed at the present
+ * decision point.
+ * @param values
+ * Weights of the predicates which have been observed at
+ * the present decision point.
+ * @param probs
+ * Probability for outcomes.
+ * @return Normalized probabilities for the outcomes given the context.
+ */
+ private double[] eval(String[] context, float[] values, double[] probs) {
+ Context[] params = evalParams.getParams();
+
+ for (int ci = 0; ci < context.length; ci++) {
+ int predIdx = getPredIndex(context[ci]);
+
+ if (predIdx >= 0) {
+ double predValue = 1.0;
+ if (values != null) predValue = values[ci];
+
+ double[] parameters = params[predIdx].getParameters();
+ int[] outcomes = params[predIdx].getOutcomes();
+ for (int i = 0; i < outcomes.length; i++) {
+ int oi = outcomes[i];
+ probs[oi] += predValue * parameters[i];
+ }
+ }
+ }
+
+ double logSumExp = ArrayMath.logSumOfExps(probs);
+ for (int oi = 0; oi < outcomeNames.length; oi++) {
+ probs[oi] = Math.exp(probs[oi] - logSumExp);
+ }
+ return probs;
+ }
+
+ /**
+ * Model evaluation which should be used during training to report model accuracy.
+ * @param context
+ * Indices of the predicates which have been observed at the present
+ * decision point.
+ * @param values
+ * Weights of the predicates which have been observed at
+ * the present decision point.
+ * @param probs
+ * Probability for outcomes
+ * @param nOutcomes
+ * Number of outcomes
+ * @param nPredLabels
+ * Number of unique predicates
+ * @param parameters
+ * Model parameters
+ * @return Normalized probabilities for the outcomes given the context.
+ */
+ public static double[] eval(int[] context, float[] values, double[] probs,
+ int nOutcomes, int nPredLabels, double[] parameters) {
+
+ for (int i = 0; i < context.length; i++) {
+ int predIdx = context[i];
+ double predValue = values != null? values[i] : 1.0;
+ for (int oi = 0; oi < nOutcomes; oi++) {
+ probs[oi] += predValue * parameters[oi * nPredLabels + predIdx];
+ }
+ }
+
+ double logSumExp = ArrayMath.logSumOfExps(probs);
+
+ for (int oi = 0; oi < nOutcomes; oi++) {
+ probs[oi] = Math.exp(probs[oi] - logSumExp);
+ }
+
+ return probs;
+ }
+
+ public boolean equals(Object obj) {
+ if (!(obj instanceof QNModel))
+ return false;
+
+ QNModel objModel = (QNModel) obj;
+ if (this.outcomeNames.length != objModel.outcomeNames.length)
+ return false;
+ for (int i = 0; i < this.outcomeNames.length; i++) {
+ if (!this.outcomeNames[i].equals(objModel.outcomeNames[i]))
+ return false;
+ }
+
+ if (this.pmap.size() != objModel.pmap.size())
+ return false;
+ String[] pmapArray = new String[pmap.size()];
+ pmap.toArray(pmapArray);
+ for (int i = 0; i < this.pmap.size(); i++) {
+ if (i != objModel.pmap.get(pmapArray[i]))
+ return false;
+ }
+
+ // compare evalParameters
+ Context[] contextComparing = objModel.evalParams.getParams();
+ if (this.evalParams.getParams().length != contextComparing.length)
+ return false;
+ for (int i = 0; i < this.evalParams.getParams().length; i++) {
+ if (this.evalParams.getParams()[i].getOutcomes().length != contextComparing[i].getOutcomes().length)
+ return false;
+ for (int j = 0; i < this.evalParams.getParams()[i].getOutcomes().length; i++) {
+ if (this.evalParams.getParams()[i].getOutcomes()[j] != contextComparing[i].getOutcomes()[j])
+ return false;
+ }
+
+ if (this.evalParams.getParams()[i].getParameters().length != contextComparing[i].getParameters().length)
+ return false;
+ for (int j = 0; i < this.evalParams.getParams()[i].getParameters().length; i++) {
+ if (this.evalParams.getParams()[i].getParameters()[j] != contextComparing[i].getParameters()[j])
+ return false;
+ }
+ }
+ return true;
+ }
}
\ No newline at end of file
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java?rev=1612184&r1=1612183&r2=1612184&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java Mon Jul 21 04:17:58 2014
@@ -35,20 +35,26 @@ public class QNTrainer extends AbstractE
public static final String MAXENT_QN_VALUE = "MAXENT_QN";
- public static final String L1COST_PARAM = "L1Cost";
+ public static final String THREADS_PARAM = "Threads";
+ public static final int THREADS_DEFAULT = 1;
+
+ public static final String L1COST_PARAM = "L1Cost";
public static final double L1COST_DEFAULT = 0.1;
- public static final String L2COST_PARAM = "L2Cost";
+ public static final String L2COST_PARAM = "L2Cost";
public static final double L2COST_DEFAULT = 0.1;
// Number of Hessian updates to store
public static final String M_PARAM = "NumOfUpdates";
- public static final int M_DEFAULT = 15;
+ public static final int M_DEFAULT = 15;
// Maximum number of function evaluations
public static final String MAX_FCT_EVAL_PARAM = "MaxFctEval";
- public static final int MAX_FCT_EVAL_DEFAULT = 30000;
+ public static final int MAX_FCT_EVAL_DEFAULT = 30000;
+ // Number of threads
+ private int threads;
+
// L1-regularization cost
private double l1Cost;
@@ -80,6 +86,7 @@ public class QNTrainer extends AbstractE
this.verbose = verbose;
this.m = m < 0? M_DEFAULT: m;
this.maxFctEval = maxFctEval < 0? MAX_FCT_EVAL_DEFAULT: maxFctEval;
+ this.threads = THREADS_DEFAULT;
this.l1Cost = L1COST_DEFAULT;
this.l2Cost = L2COST_DEFAULT;
}
@@ -113,6 +120,13 @@ public class QNTrainer extends AbstractE
}
this.maxFctEval = maxFctEval;
+ // Number of threads must be >= 1
+ int threads = getIntParam(THREADS_PARAM, THREADS_DEFAULT);
+ if (threads < 1) {
+ return false;
+ }
+ this.threads = threads;
+
// Regularization costs must be >= 0
double l1Cost = getDoubleParam(L1COST_PARAM, L1COST_DEFAULT);
if (l1Cost < 0) {
@@ -139,11 +153,17 @@ public class QNTrainer extends AbstractE
}
// << Members related to AbstractEventTrainer
-
public QNModel trainModel(int iterations, DataIndexer indexer) {
// Train model's parameters
- Function objectiveFunction = new NegLogLikelihood(indexer);
+ Function objectiveFunction = null;
+ if (threads == 1) {
+ System.out.println("Computing model parameters ...");
+ objectiveFunction = new NegLogLikelihood(indexer);
+ } else {
+ System.out.println("Computing model parameters in " + threads + " threads ...");
+ objectiveFunction = new ParallelNegLogLikelihood(indexer, threads);
+ }
QNMinimizer minimizer = new QNMinimizer(
l1Cost, l2Cost, iterations, m, maxFctEval, verbose);
Modified: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java?rev=1612184&r1=1612183&r2=1612184&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java (original)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java Mon Jul 21 04:17:58 2014
@@ -106,5 +106,18 @@ public class QNPrepAttachTest {
testModel(model, 0.8227283981183461);
}
+
+ @Test
+ public void testQNOnPrepAttachDataInParallel() throws IOException {
+
+ Map<String, String> trainParams = new HashMap<String, String>();
+ trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
+ trainParams.put("Threads", Integer.toString(2));
+
+ MaxentModel model = TrainerFactory.getEventTrainer(trainParams, null)
+ .train(createTrainingStream());
+
+ testModel(model, 0.8115870264917059);
+ }
}