You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2017/03/13 16:06:30 UTC
[22/24] opennlp git commit: Revert "OPENNLP-1002 Remove deprecated
GIS class"
Revert "OPENNLP-1002 Remove deprecated GIS class"
This reverts commit efa257676280abd316bb677e5a8de5cb9fe1dd73.
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a9cfd7ee
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a9cfd7ee
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a9cfd7ee
Branch: refs/heads/OPENNLP-778
Commit: a9cfd7ee6e576dfb8289969950199d33177dc7ce
Parents: efa2576
Author: J�rn Kottmann <jo...@apache.org>
Authored: Fri Mar 10 17:22:28 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Fri Mar 10 17:22:28 2017 +0100
----------------------------------------------------------------------
.../main/java/opennlp/tools/ml/maxent/GIS.java | 303 +++++++++++++++++++
1 file changed, 303 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/a9cfd7ee/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
new file mode 100644
index 0000000..97c214d
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.maxent;
+
+import java.io.IOException;
+
+import opennlp.tools.ml.AbstractEventTrainer;
+import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.Prior;
+import opennlp.tools.ml.model.UniformPrior;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * A Factory class which uses instances of GISTrainer to create and train
+ * GISModels.
+ * @deprecated use {@link GISTrainer}
+ */
+@Deprecated
+public class GIS extends AbstractEventTrainer {
+
+ public static final String MAXENT_VALUE = "MAXENT";
+
+ /**
+ * Set this to false if you don't want messages about the progress of model
+ * training displayed. Alternately, you can use the overloaded version of
+ * trainModel() to conditionally enable progress messages.
+ */
+ public static boolean PRINT_MESSAGES = true;
+
+ /**
+ * If we are using smoothing, this is used as the "number" of times we want
+ * the trainer to imagine that it saw a feature that it actually didn't see.
+ * Defaulted to 0.1.
+ */
+ private static final double SMOOTHING_OBSERVATION = 0.1;
+
+ private static final String SMOOTHING_PARAM = "smoothing";
+ private static final boolean SMOOTHING_DEFAULT = false;
+
+ public GIS() {
+ }
+
+ public GIS(TrainingParameters parameters) {
+ super(parameters);
+ }
+
+ public boolean isValid() {
+
+ if (!super.isValid()) {
+ return false;
+ }
+
+ String algorithmName = getAlgorithm();
+
+ return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName)));
+ }
+
+ public boolean isSortAndMerge() {
+ return true;
+ }
+
+ public AbstractModel doTrain(DataIndexer indexer) throws IOException {
+ int iterations = getIterations();
+
+ AbstractModel model;
+
+ boolean printMessages = trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT);
+ boolean smoothing = trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT);
+ int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1);
+
+ model = trainModel(iterations, indexer, printMessages, smoothing, null, threads);
+
+ return model;
+ }
+
+ // << members related to AbstractEventTrainer
+
+ /**
+ * Train a model using the GIS algorithm, assuming 100 iterations and no
+ * cutoff.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream) throws IOException {
+ return trainModel(eventStream, 100, 0, false, PRINT_MESSAGES);
+ }
+
+ /**
+ * Train a model using the GIS algorithm, assuming 100 iterations and no
+ * cutoff.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream, boolean smoothing)
+ throws IOException {
+ return trainModel(eventStream, 100, 0, smoothing, PRINT_MESSAGES);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param cutoff
+ * The number of times a feature must be seen in order to be relevant
+ * for training.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+ int cutoff) throws IOException {
+ return trainModel(eventStream, iterations, cutoff, false, PRINT_MESSAGES);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param cutoff
+ * The number of times a feature must be seen in order to be relevant
+ * for training.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @param printMessagesWhileTraining
+ * Determines whether training status messages are written to STDOUT.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+ int cutoff, boolean smoothing, boolean printMessagesWhileTraining)
+ throws IOException {
+ GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
+ trainer.setSmoothing(smoothing);
+ trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
+ return trainer.trainModel(eventStream, iterations, cutoff);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param cutoff
+ * The number of times a feature must be seen in order to be relevant
+ * for training.
+ * @param sigma
+ * The standard deviation for the gaussian smoother.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+ int cutoff, double sigma) throws IOException {
+ GISTrainer trainer = new GISTrainer(PRINT_MESSAGES);
+ if (sigma > 0) {
+ trainer.setGaussianSigma(sigma);
+ }
+ return trainer.trainModel(eventStream, iterations, cutoff);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) {
+ return trainModel(iterations, indexer, true, smoothing, null, 1);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer) {
+ return trainModel(iterations, indexer, true, false, null, 1);
+ }
+
+ /**
+ * Train a model using the GIS algorithm with the specified number of
+ * iterations, data indexer, and prior.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @param modelPrior
+ * The prior distribution for the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer,
+ Prior modelPrior, int cutoff) {
+ return trainModel(iterations, indexer, true, false, modelPrior, cutoff);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @param printMessagesWhileTraining
+ * Determines whether training status messages are written to STDOUT.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @param modelPrior
+ * The prior distribution for the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer,
+ boolean printMessagesWhileTraining, boolean smoothing,
+ Prior modelPrior) {
+ return trainModel(iterations, indexer, printMessagesWhileTraining, smoothing, modelPrior, 1);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @param printMessagesWhileTraining
+ * Determines whether training status messages are written to STDOUT.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @param modelPrior
+ * The prior distribution for the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer,
+ boolean printMessagesWhileTraining, boolean smoothing,
+ Prior modelPrior, int threads) {
+ GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
+ trainer.setSmoothing(smoothing);
+ trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
+ if (modelPrior == null) {
+ modelPrior = new UniformPrior();
+ }
+ return trainer.trainModel(iterations, indexer, modelPrior, threads);
+ }
+}
+
+
+