You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/04/20 10:40:59 UTC

[12/50] [abbrv] opennlp git commit: OPENNLP-978: Set name finder defaults to perceptron and cutoff zero

OPENNLP-978: Set name finder defaults to perceptron and cutoff zero


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/2079931f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/2079931f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/2079931f

Branch: refs/heads/parser_regression
Commit: 2079931f074f5907bf7fd523ef757a9972bb9a74
Parents: a1bb54b
Author: J�rn Kottmann <jo...@apache.org>
Authored: Tue Feb 7 23:58:43 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Thu Apr 20 12:40:19 2017 +0200

----------------------------------------------------------------------
 .../namefind/TokenNameFinderCrossValidatorTool.java    |  4 ++--
 .../cmdline/namefind/TokenNameFinderTrainerTool.java   |  3 ++-
 .../main/java/opennlp/tools/namefind/NameFinderME.java |  6 ++++++
 .../java/opennlp/tools/util/TrainingParameters.java    | 13 +++++++++++++
 .../java/opennlp/tools/namefind/NameFinderMETest.java  |  1 +
 5 files changed, 24 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
index 333abd9..153d6f7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
@@ -42,8 +42,8 @@ import opennlp.tools.namefind.TokenNameFinderEvaluationMonitor;
 import opennlp.tools.namefind.TokenNameFinderFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceCodec;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.eval.EvaluationMonitor;
-import opennlp.tools.util.model.ModelUtil;
 
 public final class TokenNameFinderCrossValidatorTool
     extends AbstractCrossValidatorTool<NameSample, CVToolParams> {
@@ -65,7 +65,7 @@ public final class TokenNameFinderCrossValidatorTool
 
     mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
     if (mlParams == null) {
-      mlParams = ModelUtil.createDefaultTrainingParameters();
+      mlParams = new TrainingParameters();
     }
 
     byte featureGeneratorBytes[] =

http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index a8d4417..fb73506 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -40,6 +40,7 @@ import opennlp.tools.namefind.TokenNameFinderFactory;
 import opennlp.tools.namefind.TokenNameFinderModel;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceCodec;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.featuregen.GeneratorFactory;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.ModelUtil;
@@ -166,7 +167,7 @@ public final class TokenNameFinderTrainerTool
 
     mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
     if (mlParams == null) {
-      mlParams = ModelUtil.createDefaultTrainingParameters();
+      mlParams = new TrainingParameters();
     }
 
     File modelOutFile = params.getModel();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
index 6ce0b83..5a16f34 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
@@ -37,6 +37,7 @@ import opennlp.tools.ml.TrainerFactory.TrainerType;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.ml.perceptron.PerceptronTrainer;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Sequence;
 import opennlp.tools.util.SequenceCodec;
@@ -219,6 +220,11 @@ public class NameFinderME implements TokenNameFinder {
   public static TokenNameFinderModel train(String languageCode, String type,
           ObjectStream<NameSample> samples, TrainingParameters trainParams,
           TokenNameFinderFactory factory) throws IOException {
+
+    trainParams.putIfAbsent(TrainingParameters.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
+    trainParams.putIfAbsent(TrainingParameters.CUTOFF_PARAM, "0");
+    trainParams.putIfAbsent(TrainingParameters.ITERATIONS_PARAM, "300");
+
     String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
 
     int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java b/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
index 188446c..3f21623 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
@@ -130,6 +130,19 @@ public class TrainingParameters {
     return params;
   }
 
+  public void putIfAbsent(String namespace, String key, String value) {
+    if (namespace == null) {
+      parameters.putIfAbsent(key, value);
+    }
+    else {
+      parameters.putIfAbsent(namespace + "." + key, value);
+    }
+  }
+
+  public void putIfAbsent(String key, String value) {
+    putIfAbsent(null, key, value);
+  }
+
   public void put(String namespace, String key, String value) {
 
     if (namespace == null) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
index eded5c5..494af62 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
@@ -285,6 +285,7 @@ public class NameFinderMETest {
         new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8"));
 
     TrainingParameters params = new TrainingParameters();
+    params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
     params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
     params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));