You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2017/03/13 16:06:09 UTC
[01/24] opennlp git commit: closes apache/opennlp#124 *Already
Merged* [Forced Update!]
Repository: opennlp
Updated Branches:
refs/heads/OPENNLP-778 a8b1d2e61 -> 11d7581b3 (forced update)
closes apache/opennlp#124 *Already Merged*
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/8158f526
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/8158f526
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/8158f526
Branch: refs/heads/OPENNLP-778
Commit: 8158f5264c3f7828113e42c72a7d6d6e8764dae7
Parents: 9fbc2a3
Author: smarthi <sm...@apache.org>
Authored: Wed Feb 15 21:17:01 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Wed Feb 15 21:17:01 2017 -0500
----------------------------------------------------------------------
----------------------------------------------------------------------
[02/24] opennlp git commit: NoJira: Adding public RepoToken to
investigate Travis coveralls build failures, this closes apache/opennlp#128
Posted by co...@apache.org.
NoJira: Adding public RepoToken to investigate Travis coveralls build failures, this closes apache/opennlp#128
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/929595d2
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/929595d2
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/929595d2
Branch: refs/heads/OPENNLP-778
Commit: 929595d2f707a24cffbcbb593f5357e19385b266
Parents: 8158f52
Author: smarthi <sm...@apache.org>
Authored: Thu Feb 16 00:17:53 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Thu Feb 16 00:17:53 2017 -0500
----------------------------------------------------------------------
.travis.yml | 6 ++++++
pom.xml | 3 +++
2 files changed, 9 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/929595d2/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index b4c83ad..49d902e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,12 @@ jdk: oraclejdk8
sudo: false
+env:
+ global:
+ # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created
+ # via the "travis encrypt" command using the project repo's public key
+ - secure: "WLRKO/tD2rFN+a/HKSf9iZkaMaFE8/luXcJCXGfewoHysF7LgIJ76AN9HY50woVJykl+T/tEhK5c/+H/IKO5zH8Rvz/Q9XxPTvUTOwH7oFOHCQ66mKTvn27Z4fp+JbkPKJuhWDUzPvS/Alo3wE70UELnFRTFoRsemfNNa95uPJobfx5deOfX80mipHOn16dA1q8LuzQa6iF2HIVuh7ygLleTV0cDJyXmIlg3EbKGEitozIv0WkwALrBjLS7KmCcXTKxXqCm1Be2MFRoh9ab2bEooXlv2zRh2wT0c04RckFm1AJGpGQelXLl3NxxcRJSpIN9OTkpVUfwm28TIXk2SzdgPMrP11yFK/DPKTv0jwyk1bFrmZMMso5Y2rP6wjNEtw5ExYSpk3xebcieLJwXhCwkkWAT3DdAAeXO5z4Nf36lryjRgqvlsVF1ofqAK5Sh+qH93/TJOE+hVEj74xUT9pVaxemY61ymvSt8L21XkUsp8T5ILq9jWoaMQCaAwZIaJiHXYjQhmsrFRkNaY4cl9AUGwpHmm750uqhmoVfuJzQg5/vGMZ0LWeCgR9qsG5MG0yijE8ghExUOe7R4gcNAJW2XOfjzMTy74jdsJbsJPUeci/R4wzrXTSCQVJ5nj2LhBF6HyqPyUrIV2MB14gAIItc1LASuB1GLkGoXjIdt0HN8="
+
cache:
directories:
- $HOME/.m2
http://git-wip-us.apache.org/repos/asf/opennlp/blob/929595d2/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 268a54e..8e37452 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,6 +214,9 @@
<groupId>org.eluder.coveralls</groupId>
<artifactId>coveralls-maven-plugin</artifactId>
<version>${coveralls.maven.plugin}</version>
+ <configuration>
+ <repoToken>BD8e0j90KZlQdko7H3wEo5a0mTLhmoeyk</repoToken>
+ </configuration>
</plugin>
<plugin>
[22/24] opennlp git commit: Revert "OPENNLP-1002 Remove deprecated
GIS class"
Posted by co...@apache.org.
Revert "OPENNLP-1002 Remove deprecated GIS class"
This reverts commit efa257676280abd316bb677e5a8de5cb9fe1dd73.
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a9cfd7ee
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a9cfd7ee
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a9cfd7ee
Branch: refs/heads/OPENNLP-778
Commit: a9cfd7ee6e576dfb8289969950199d33177dc7ce
Parents: efa2576
Author: J�rn Kottmann <jo...@apache.org>
Authored: Fri Mar 10 17:22:28 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Fri Mar 10 17:22:28 2017 +0100
----------------------------------------------------------------------
.../main/java/opennlp/tools/ml/maxent/GIS.java | 303 +++++++++++++++++++
1 file changed, 303 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/a9cfd7ee/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
new file mode 100644
index 0000000..97c214d
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.maxent;
+
+import java.io.IOException;
+
+import opennlp.tools.ml.AbstractEventTrainer;
+import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.Prior;
+import opennlp.tools.ml.model.UniformPrior;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * A Factory class which uses instances of GISTrainer to create and train
+ * GISModels.
+ * @deprecated use {@link GISTrainer}
+ */
+@Deprecated
+public class GIS extends AbstractEventTrainer {
+
+ public static final String MAXENT_VALUE = "MAXENT";
+
+ /**
+ * Set this to false if you don't want messages about the progress of model
+ * training displayed. Alternately, you can use the overloaded version of
+ * trainModel() to conditionally enable progress messages.
+ */
+ public static boolean PRINT_MESSAGES = true;
+
+ /**
+ * If we are using smoothing, this is used as the "number" of times we want
+ * the trainer to imagine that it saw a feature that it actually didn't see.
+ * Defaulted to 0.1.
+ */
+ private static final double SMOOTHING_OBSERVATION = 0.1;
+
+ private static final String SMOOTHING_PARAM = "smoothing";
+ private static final boolean SMOOTHING_DEFAULT = false;
+
+ public GIS() {
+ }
+
+ public GIS(TrainingParameters parameters) {
+ super(parameters);
+ }
+
+ public boolean isValid() {
+
+ if (!super.isValid()) {
+ return false;
+ }
+
+ String algorithmName = getAlgorithm();
+
+ return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName)));
+ }
+
+ public boolean isSortAndMerge() {
+ return true;
+ }
+
+ public AbstractModel doTrain(DataIndexer indexer) throws IOException {
+ int iterations = getIterations();
+
+ AbstractModel model;
+
+ boolean printMessages = trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT);
+ boolean smoothing = trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT);
+ int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1);
+
+ model = trainModel(iterations, indexer, printMessages, smoothing, null, threads);
+
+ return model;
+ }
+
+ // << members related to AbstractEventTrainer
+
+ /**
+ * Train a model using the GIS algorithm, assuming 100 iterations and no
+ * cutoff.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream) throws IOException {
+ return trainModel(eventStream, 100, 0, false, PRINT_MESSAGES);
+ }
+
+ /**
+ * Train a model using the GIS algorithm, assuming 100 iterations and no
+ * cutoff.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream, boolean smoothing)
+ throws IOException {
+ return trainModel(eventStream, 100, 0, smoothing, PRINT_MESSAGES);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param cutoff
+ * The number of times a feature must be seen in order to be relevant
+ * for training.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+ int cutoff) throws IOException {
+ return trainModel(eventStream, iterations, cutoff, false, PRINT_MESSAGES);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param cutoff
+ * The number of times a feature must be seen in order to be relevant
+ * for training.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @param printMessagesWhileTraining
+ * Determines whether training status messages are written to STDOUT.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+ int cutoff, boolean smoothing, boolean printMessagesWhileTraining)
+ throws IOException {
+ GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
+ trainer.setSmoothing(smoothing);
+ trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
+ return trainer.trainModel(eventStream, iterations, cutoff);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param eventStream
+ * The EventStream holding the data on which this model will be
+ * trained.
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param cutoff
+ * The number of times a feature must be seen in order to be relevant
+ * for training.
+ * @param sigma
+ * The standard deviation for the gaussian smoother.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+ int cutoff, double sigma) throws IOException {
+ GISTrainer trainer = new GISTrainer(PRINT_MESSAGES);
+ if (sigma > 0) {
+ trainer.setGaussianSigma(sigma);
+ }
+ return trainer.trainModel(eventStream, iterations, cutoff);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) {
+ return trainModel(iterations, indexer, true, smoothing, null, 1);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer) {
+ return trainModel(iterations, indexer, true, false, null, 1);
+ }
+
+ /**
+ * Train a model using the GIS algorithm with the specified number of
+ * iterations, data indexer, and prior.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @param modelPrior
+ * The prior distribution for the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer,
+ Prior modelPrior, int cutoff) {
+ return trainModel(iterations, indexer, true, false, modelPrior, cutoff);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @param printMessagesWhileTraining
+ * Determines whether training status messages are written to STDOUT.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @param modelPrior
+ * The prior distribution for the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer,
+ boolean printMessagesWhileTraining, boolean smoothing,
+ Prior modelPrior) {
+ return trainModel(iterations, indexer, printMessagesWhileTraining, smoothing, modelPrior, 1);
+ }
+
+ /**
+ * Train a model using the GIS algorithm.
+ *
+ * @param iterations
+ * The number of GIS iterations to perform.
+ * @param indexer
+ * The object which will be used for event compilation.
+ * @param printMessagesWhileTraining
+ * Determines whether training status messages are written to STDOUT.
+ * @param smoothing
+ * Defines whether the created trainer will use smoothing while
+ * training the model.
+ * @param modelPrior
+ * The prior distribution for the model.
+ * @return The newly trained model, which can be used immediately or saved to
+ * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+ */
+ public static GISModel trainModel(int iterations, DataIndexer indexer,
+ boolean printMessagesWhileTraining, boolean smoothing,
+ Prior modelPrior, int threads) {
+ GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
+ trainer.setSmoothing(smoothing);
+ trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
+ if (modelPrior == null) {
+ modelPrior = new UniformPrior();
+ }
+ return trainer.trainModel(iterations, indexer, modelPrior, threads);
+ }
+}
+
+
+
[24/24] opennlp git commit: OPENNLP-778: Add initial LanguageDetector
interface and Language class
Posted by co...@apache.org.
OPENNLP-778: Add initial LanguageDetector interface and Language class
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/11d7581b
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/11d7581b
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/11d7581b
Branch: refs/heads/OPENNLP-778
Commit: 11d7581b3bd36a1202a1e12ceb014796f424c56e
Parents: cb9b00a
Author: William D C M SILVA <co...@apache.org>
Authored: Wed Feb 15 10:03:28 2017 -0200
Committer: William D C M SILVA <co...@apache.org>
Committed: Mon Mar 13 13:04:26 2017 -0300
----------------------------------------------------------------------
.../java/opennlp/tools/langdetect/Language.java | 39 ++++++++++++++++++++
.../tools/langdetect/LanguageDetector.java | 33 +++++++++++++++++
2 files changed, 72 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/11d7581b/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java
new file mode 100644
index 0000000..773201f
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.langdetect;
+
+/**
+ * Class for holding the document language and its confidence
+ */
+public class Language {
+ private final String lang;
+ private final double confidence;
+
+ public Language(String lang, double confidence) {
+ this.lang = lang;
+ this.confidence = confidence;
+ }
+
+ public String getLang() {
+ return lang;
+ }
+
+ public double getConfidence() {
+ return confidence;
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/11d7581b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java
new file mode 100644
index 0000000..ca897fd
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.langdetect;
+
+import java.util.Set;
+
+/**
+ * The interface for name finders which provide name tags for a sequence of tokens.
+ */
+public interface LanguageDetector {
+
+ Language[] detectLanguage(CharSequence content);
+
+ Set<String> getSupportedLanguages();
+
+ String getLanguageCoding();
+
+}
[17/24] opennlp git commit: OPENNLP-989: Fix validation of CONT after
START with different type
Posted by co...@apache.org.
OPENNLP-989: Fix validation of CONT after START with different type
This closes #126
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/ebf10817
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/ebf10817
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/ebf10817
Branch: refs/heads/OPENNLP-778
Commit: ebf108170e9a6215176174d84313a0564ddde0c4
Parents: f60bfa2
Author: Peter Thygesen <pe...@gmail.com>
Authored: Fri Feb 17 15:17:13 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Thu Mar 2 21:25:28 2017 +0100
----------------------------------------------------------------------
.../opennlp/tools/namefind/NameFinderSequenceValidator.java | 5 +++--
.../test/java/opennlp/tools/eval/Conll02NameFinderEval.java | 8 ++++----
.../tools/namefind/NameFinderSequenceValidatorTest.java | 2 --
3 files changed, 7 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/ebf10817/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
index d42e8c5..5143468 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
@@ -35,8 +35,9 @@ public class NameFinderSequenceValidator implements
return false;
} else if (outcomesSequence[li].endsWith(NameFinderME.OTHER)) {
return false;
- } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE)) {
- // if it is continue, we have to check if previous match was of the same type
+ } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE) ||
+ outcomesSequence[li].endsWith(NameFinderME.START)) {
+ // if it is continue or start, we have to check if previous match was of the same type
String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]);
String nameType = NameFinderME.extractNameType(outcome);
if (previousNameType != null || nameType != null ) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/ebf10817/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
index d1a71cf..abe53aa 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
@@ -287,9 +287,9 @@ public class Conll02NameFinderEval {
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
combinedType);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL, combinedType, 0.6728164867517175d);
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NL, combinedType, 0.6673209028459275d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.6985893619774816d);
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.6984085910208306d);
}
@Test
@@ -507,9 +507,9 @@ public class Conll02NameFinderEval {
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
combinedType);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.706765154179857d);
+ eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.707400023454908d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7583580194667795d);
+ eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7576868829337094d);
}
@Test
http://git-wip-us.apache.org/repos/asf/opennlp/blob/ebf10817/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
index 35752c1..1b2f6ed 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
@@ -17,7 +17,6 @@
package opennlp.tools.namefind;
import org.junit.Assert;
-import org.junit.Ignore;
import org.junit.Test;
/**
@@ -55,7 +54,6 @@ public class NameFinderSequenceValidatorTest {
}
- @Ignore
@Test
public void testContinueAfterStartAndNotSameType() {
[20/24] opennlp git commit: OPENNLP-125: Make POS Tagger feature
generation configurable
Posted by co...@apache.org.
OPENNLP-125: Make POS Tagger feature generation configurable
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/530432ce
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/530432ce
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/530432ce
Branch: refs/heads/OPENNLP-778
Commit: 530432ce029cdfc7ab479b0002281f0f719ae724
Parents: 347babe
Author: J�rn Kottmann <jo...@apache.org>
Authored: Thu Feb 9 18:54:27 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Tue Mar 7 15:59:16 2017 +0100
----------------------------------------------------------------------
.../namefind/TokenNameFinderTrainerTool.java | 2 +-
.../postag/POSTaggerCrossValidatorTool.java | 10 +-
.../cmdline/postag/POSTaggerTrainerTool.java | 26 +--
.../tools/cmdline/postag/TrainingParams.java | 13 +-
.../postag/ConfigurablePOSContextGenerator.java | 105 +++++++++++
.../opennlp/tools/postag/POSDictionary.java | 8 +-
.../java/opennlp/tools/postag/POSModel.java | 40 +++--
.../tools/postag/POSTaggerCrossValidator.java | 44 ++---
.../opennlp/tools/postag/POSTaggerFactory.java | 179 ++++++++++++++++++-
.../tools/util/featuregen/GeneratorFactory.java | 12 ++
.../featuregen/PosTaggerFeatureGenerator.java | 62 +++++++
.../tools/postag/pos-default-features.xml | 38 ++++
.../ConfigurablePOSContextGeneratorTest.java | 55 ++++++
.../tools/postag/DummyPOSTaggerFactory.java | 14 +-
.../tools/postag/POSTaggerFactoryTest.java | 11 +-
15 files changed, 534 insertions(+), 85 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index 5bb18d2..4fb8cb9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -67,7 +67,7 @@ public final class TokenNameFinderTrainerTool
return null;
}
- static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
+ public static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
byte[] featureGeneratorBytes = null;
// load descriptor file into memory
if (featureGenDescriptorFile != null) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
index d91d4ee..67ad2b9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
@@ -22,10 +22,12 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.util.Map;
import opennlp.tools.cmdline.AbstractCrossValidatorTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
import opennlp.tools.cmdline.params.CVParams;
import opennlp.tools.cmdline.params.FineGrainedEvaluatorParams;
import opennlp.tools.cmdline.postag.POSTaggerCrossValidatorTool.CVToolParams;
@@ -75,10 +77,16 @@ public final class POSTaggerCrossValidatorTool
}
}
+ Map<String, Object> resources = TokenNameFinderTrainerTool.loadResources(
+ params.getResources(), params.getFeaturegen());
+
+ byte[] featureGeneratorBytes =
+ TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
+
POSTaggerCrossValidator validator;
try {
validator = new POSTaggerCrossValidator(params.getLang(), mlParams,
- params.getDict(), params.getNgram(), params.getTagDictCutoff(),
+ params.getDict(), featureGeneratorBytes, resources, params.getTagDictCutoff(),
params.getFactory(), missclassifiedListener, reportListener);
validator.evaluate(sampleStream, params.getFolds());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
index 1e6fb54..b922176 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
@@ -19,13 +19,14 @@ package opennlp.tools.cmdline.postag;
import java.io.File;
import java.io.IOException;
+import java.util.Map;
import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.cmdline.postag.POSTaggerTrainerTool.TrainerToolParams;
-import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.postag.MutableTagDictionary;
import opennlp.tools.postag.POSModel;
@@ -66,25 +67,16 @@ public final class POSTaggerTrainerTool
File modelOutFile = params.getModel();
CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);
- Dictionary ngramDict = null;
+ Map<String, Object> resources = TokenNameFinderTrainerTool.loadResources(
+ params.getResources(), params.getFeaturegen());
- Integer ngramCutoff = params.getNgram();
-
- if (ngramCutoff != null) {
- System.err.print("Building ngram dictionary ... ");
- try {
- ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff);
- sampleStream.reset();
- } catch (IOException e) {
- throw new TerminateToolException(-1,
- "IO error while building NGram Dictionary: " + e.getMessage(), e);
- }
- System.err.println("done");
- }
+ byte[] featureGeneratorBytes =
+ TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
POSTaggerFactory postaggerFactory;
try {
- postaggerFactory = POSTaggerFactory.create(params.getFactory(), ngramDict, null);
+ postaggerFactory = POSTaggerFactory.create(params.getFactory(), featureGeneratorBytes,
+ resources, null);
} catch (InvalidFormatException e) {
throw new TerminateToolException(-1, e.getMessage(), e);
}
@@ -95,7 +87,7 @@ public final class POSTaggerTrainerTool
.createTagDictionary(params.getDict()));
} catch (IOException e) {
throw new TerminateToolException(-1,
- "IO error while loading POS Dictionary: " + e.getMessage(), e);
+ "IO error while loading POS Dictionary", e);
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
index 690b359..31d5e48 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
@@ -29,14 +29,17 @@ import opennlp.tools.cmdline.params.BasicTrainingParams;
* Note: Do not use this class, internal use only!
*/
interface TrainingParams extends BasicTrainingParams {
- @ParameterDescription(valueName = "dictionaryPath", description = "The XML tag dictionary file")
+ @ParameterDescription(valueName = "featuregenFile", description = "The feature generator descriptor file")
@OptionalParameter
- File getDict();
+ File getFeaturegen();
+
+ @ParameterDescription(valueName = "resourcesDir", description = "The resources directory")
+ @OptionalParameter
+ File getResources();
- @ParameterDescription(valueName = "cutoff",
- description = "NGram cutoff. If not specified will not create ngram dictionary.")
+ @ParameterDescription(valueName = "dictionaryPath", description = "The XML tag dictionary file")
@OptionalParameter
- Integer getNgram();
+ File getDict();
@ParameterDescription(valueName = "tagDictCutoff",
description = "TagDictionary cutoff. If specified will create/expand a mutable TagDictionary")
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/postag/ConfigurablePOSContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/ConfigurablePOSContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/ConfigurablePOSContextGenerator.java
new file mode 100644
index 0000000..e6b65df
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/ConfigurablePOSContextGenerator.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.postag;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import opennlp.tools.util.Cache;
+import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
+
+/**
+ * A context generator for the POS Tagger.
+ */
+public class ConfigurablePOSContextGenerator implements POSContextGenerator {
+
+ private Cache<String, String[]> contextsCache;
+ private Object wordsKey;
+
+ private final AdaptiveFeatureGenerator featureGenerator;
+
+ /**
+ * Initializes the current instance.
+ *
+ * @param cacheSize
+ */
+ public ConfigurablePOSContextGenerator(int cacheSize, AdaptiveFeatureGenerator featureGenerator) {
+ this.featureGenerator = Objects.requireNonNull(featureGenerator, "featureGenerator must not be null");
+
+ if (cacheSize > 0) {
+ contextsCache = new Cache<>(cacheSize);
+ }
+ }
+
+ /**
+ * Initializes the current instance.
+ *
+ */
+ public ConfigurablePOSContextGenerator(AdaptiveFeatureGenerator featureGenerator) {
+ this(0, featureGenerator);
+ }
+
+ /**
+ * Returns the context for making a pos tag decision at the specified token index
+ * given the specified tokens and previous tags.
+ * @param index The index of the token for which the context is provided.
+ * @param tokens The tokens in the sentence.
+ * @param tags The tags assigned to the previous words in the sentence.
+ * @return The context for making a pos tag decision at the specified token index
+ * given the specified tokens and previous tags.
+ */
+ public String[] getContext(int index, String[] tokens, String[] tags,
+ Object[] additionalContext) {
+
+ String tagprev = null;
+ String tagprevprev = null;
+
+ if (index - 1 >= 0) {
+ tagprev = tags[index - 1];
+
+ if (index - 2 >= 0) {
+ tagprevprev = tags[index - 2];
+ }
+ }
+
+ String cacheKey = index + tagprev + tagprevprev;
+ if (contextsCache != null) {
+ if (wordsKey == tokens) {
+ String[] cachedContexts = contextsCache.get(cacheKey);
+ if (cachedContexts != null) {
+ return cachedContexts;
+ }
+ }
+ else {
+ contextsCache.clear();
+ wordsKey = tokens;
+ }
+ }
+
+ List<String> e = new ArrayList<>();
+
+ featureGenerator.createFeatures(e, tokens, index, tags);
+
+ String[] contexts = e.toArray(new String[e.size()]);
+ if (contextsCache != null) {
+ contextsCache.put(cacheKey, contexts);
+ }
+ return contexts;
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
index 5f5eb25..90d51c1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
@@ -32,12 +32,13 @@ import opennlp.tools.dictionary.serializer.Entry;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.StringList;
import opennlp.tools.util.StringUtil;
+import opennlp.tools.util.model.SerializableArtifact;
/**
* Provides a means of determining which tags are valid for a particular word
* based on a tag dictionary read from a file.
*/
-public class POSDictionary implements Iterable<String>, MutableTagDictionary {
+public class POSDictionary implements Iterable<String>, MutableTagDictionary, SerializableArtifact {
private Map<String, String[]> dictionary;
@@ -265,4 +266,9 @@ public class POSDictionary implements Iterable<String>, MutableTagDictionary {
public boolean isCaseSensitive() {
return this.caseSensitive;
}
+
+ @Override
+ public Class<?> getArtifactSerializerClass() {
+ return POSTaggerFactory.POSDictionarySerializer.class;
+ }
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
index bfe5c90..f81092b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Map;
+import java.util.Objects;
import java.util.Properties;
import opennlp.tools.dictionary.Dictionary;
@@ -32,6 +33,7 @@ import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.model.ArtifactSerializer;
import opennlp.tools.util.model.BaseModel;
+import opennlp.tools.util.model.ByteArraySerializer;
/**
* The {@link POSModel} is the model used
@@ -42,18 +44,23 @@ import opennlp.tools.util.model.BaseModel;
public final class POSModel extends BaseModel {
private static final String COMPONENT_NAME = "POSTaggerME";
-
static final String POS_MODEL_ENTRY_NAME = "pos.model";
+ static final String GENERATOR_DESCRIPTOR_ENTRY_NAME = "generator.featuregen";
public POSModel(String languageCode, SequenceClassificationModel<String> posModel,
Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries, posFactory);
- if (posModel == null)
- throw new IllegalArgumentException("The maxentPosModel param must not be null!");
+ artifactMap.put(POS_MODEL_ENTRY_NAME,
+ Objects.requireNonNull(posModel, "posModel must not be null"));
+
+ artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, posFactory.getFeatureGenerator());
+
+ for (Map.Entry<String, Object> resource : posFactory.getResources().entrySet()) {
+ artifactMap.put(resource.getKey(), resource.getValue());
+ }
- artifactMap.put(POS_MODEL_ENTRY_NAME, posModel);
// TODO: This fails probably for the sequence model ... ?!
// checkArtifactMap();
}
@@ -68,13 +75,18 @@ public final class POSModel extends BaseModel {
super(COMPONENT_NAME, languageCode, manifestInfoEntries, posFactory);
- if (posModel == null)
- throw new IllegalArgumentException("The maxentPosModel param must not be null!");
+ Objects.requireNonNull(posModel, "posModel must not be null");
Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
manifest.setProperty(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
artifactMap.put(POS_MODEL_ENTRY_NAME, posModel);
+ artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, posFactory.getFeatureGenerator());
+
+ for (Map.Entry<String, Object> resource : posFactory.getResources().entrySet()) {
+ artifactMap.put(resource.getKey(), resource.getValue());
+ }
+
checkArtifactMap();
}
@@ -96,14 +108,6 @@ public final class POSModel extends BaseModel {
}
@Override
- @SuppressWarnings("rawtypes")
- protected void createArtifactSerializers(
- Map<String, ArtifactSerializer> serializers) {
-
- super.createArtifactSerializers(serializers);
- }
-
- @Override
protected void validateArtifactMap() throws InvalidFormatException {
super.validateArtifactMap();
@@ -114,6 +118,7 @@ public final class POSModel extends BaseModel {
/**
* @deprecated use getPosSequenceModel instead. This method will be removed soon.
+ * Only required for Parser 1.5.x backward compatibility. Newer models don't need this anymore.
*/
@Deprecated
public MaxentModel getPosModel() {
@@ -151,6 +156,13 @@ public final class POSModel extends BaseModel {
return (POSTaggerFactory) this.toolFactory;
}
+ @Override
+ protected void createArtifactSerializers(Map<String, ArtifactSerializer> serializers) {
+ super.createArtifactSerializers(serializers);
+
+ serializers.put("featuregen", new ByteArraySerializer());
+ }
+
/**
* Retrieves the ngram dictionary.
*
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
index 3010e03..a35bbb6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
@@ -19,6 +19,7 @@ package opennlp.tools.postag;
import java.io.File;
import java.io.IOException;
+import java.util.Map;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.ObjectStream;
@@ -32,7 +33,8 @@ public class POSTaggerCrossValidator {
private final TrainingParameters params;
- private Integer ngramCutoff;
+ private byte[] featureGeneratorBytes;
+ private Map<String, Object> resources;
private Mean wordAccuracy = new Mean();
private POSTaggerEvaluationMonitor[] listeners;
@@ -51,18 +53,21 @@ public class POSTaggerCrossValidator {
* the tag and the ngram dictionaries.
*/
public POSTaggerCrossValidator(String languageCode,
- TrainingParameters trainParam, File tagDictionary,
- Integer ngramCutoff, Integer tagdicCutoff, String factoryClass,
- POSTaggerEvaluationMonitor... listeners) {
+ TrainingParameters trainParam, File tagDictionary,
+ byte[] featureGeneratorBytes, Map<String, Object> resources,
+ Integer tagdicCutoff, String factoryClass,
+ POSTaggerEvaluationMonitor... listeners) {
this.languageCode = languageCode;
this.params = trainParam;
- this.ngramCutoff = ngramCutoff;
+ this.featureGeneratorBytes = featureGeneratorBytes;
+ this.resources = resources;
this.listeners = listeners;
this.factoryClassName = factoryClass;
this.tagdicCutoff = tagdicCutoff;
this.tagDictionaryFile = tagDictionary;
}
+
/**
* Creates a {@link POSTaggerCrossValidator} using the given
* {@link POSTaggerFactory}.
@@ -74,7 +79,6 @@ public class POSTaggerCrossValidator {
this.params = trainParam;
this.listeners = listeners;
this.factory = factory;
- this.ngramCutoff = null;
this.tagdicCutoff = null;
}
@@ -98,33 +102,18 @@ public class POSTaggerCrossValidator {
CrossValidationPartitioner.TrainingSampleStream<POSSample> trainingSampleStream = partitioner
.next();
- if (this.factory == null) {
- this.factory = POSTaggerFactory.create(this.factoryClassName, null,
- null);
- }
-
- Dictionary ngramDict = this.factory.getDictionary();
- if (ngramDict == null) {
- if (this.ngramCutoff != null) {
- System.err.print("Building ngram dictionary ... ");
- ngramDict = POSTaggerME.buildNGramDictionary(trainingSampleStream,
- this.ngramCutoff);
- trainingSampleStream.reset();
- System.err.println("done");
- }
- this.factory.setDictionary(ngramDict);
- }
if (this.tagDictionaryFile != null
&& this.factory.getTagDictionary() == null) {
this.factory.setTagDictionary(this.factory
.createTagDictionary(tagDictionaryFile));
}
+
+ TagDictionary dict = null;
if (this.tagdicCutoff != null) {
- TagDictionary dict = this.factory.getTagDictionary();
+ dict = this.factory.getTagDictionary();
if (dict == null) {
dict = this.factory.createEmptyTagDictionary();
- this.factory.setTagDictionary(dict);
}
if (dict instanceof MutableTagDictionary) {
POSTaggerME.populatePOSDictionary(trainingSampleStream, (MutableTagDictionary)dict,
@@ -136,6 +125,12 @@ public class POSTaggerCrossValidator {
trainingSampleStream.reset();
}
+ if (this.factory == null) {
+ this.factory = POSTaggerFactory.create(this.factoryClassName, null, null);
+ }
+
+ factory.init(featureGeneratorBytes, resources, dict);
+
POSModel model = POSTaggerME.train(languageCode, trainingSampleStream,
params, this.factory);
@@ -148,7 +143,6 @@ public class POSTaggerCrossValidator {
if (this.tagdicCutoff != null) {
this.factory.setTagDictionary(null);
}
-
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
index eb5466e..37143c9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
@@ -17,6 +17,8 @@
package opennlp.tools.postag;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -29,10 +31,15 @@ import java.util.Set;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.Version;
import opennlp.tools.util.ext.ExtensionLoader;
+import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
+import opennlp.tools.util.featuregen.AggregatedFeatureGenerator;
+import opennlp.tools.util.featuregen.GeneratorFactory;
import opennlp.tools.util.model.ArtifactSerializer;
import opennlp.tools.util.model.UncloseableInputStream;
@@ -44,7 +51,10 @@ public class POSTaggerFactory extends BaseToolFactory {
private static final String TAG_DICTIONARY_ENTRY_NAME = "tags.tagdict";
private static final String NGRAM_DICTIONARY_ENTRY_NAME = "ngram.dictionary";
+
protected Dictionary ngramDictionary;
+ private byte[] featureGeneratorBytes;
+ private Map<String, Object> resources;
protected TagDictionary posDictionary;
/**
@@ -60,23 +70,127 @@ public class POSTaggerFactory extends BaseToolFactory {
*
* @param ngramDictionary
* @param posDictionary
+ *
+ * @deprecated this constructor is here for backward compatibility and
+ * is not functional anymore in the training of 1.8.x series models
*/
- public POSTaggerFactory(Dictionary ngramDictionary,
- TagDictionary posDictionary) {
+ @Deprecated
+ public POSTaggerFactory(Dictionary ngramDictionary, TagDictionary posDictionary) {
this.init(ngramDictionary, posDictionary);
+
+ // TODO: This could be made functional by creating some default feature generation
+ // which uses the dictionary ...
+ }
+
+ public POSTaggerFactory(byte[] featureGeneratorBytes, final Map<String, Object> resources,
+ TagDictionary posDictionary) {
+ this.featureGeneratorBytes = featureGeneratorBytes;
+
+ if (this.featureGeneratorBytes == null) {
+ this.featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
+ }
+
+ this.resources = resources;
+ this.posDictionary = posDictionary;
}
+ @Deprecated // will be removed when only 8 series models are supported
protected void init(Dictionary ngramDictionary, TagDictionary posDictionary) {
this.ngramDictionary = ngramDictionary;
this.posDictionary = posDictionary;
}
+ protected void init(byte[] featureGeneratorBytes, final Map<String, Object> resources,
+ TagDictionary posDictionary) {
+ this.featureGeneratorBytes = featureGeneratorBytes;
+ this.resources = resources;
+ this.posDictionary = posDictionary;
+ }
+ private static byte[] loadDefaultFeatureGeneratorBytes() {
+
+ ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+ try (InputStream in = TokenNameFinderFactory.class.getResourceAsStream(
+ "/opennlp/tools/postag/pos-default-features.xml")) {
+
+ if (in == null) {
+ throw new IllegalStateException("Classpath must contain pos-default-features.xml file!");
+ }
+
+ byte[] buf = new byte[1024];
+ int len;
+ while ((len = in.read(buf)) > 0) {
+ bytes.write(buf, 0, len);
+ }
+ }
+ catch (IOException e) {
+ throw new IllegalStateException("Failed reading from pos-default-features.xml file on classpath!");
+ }
+
+ return bytes.toByteArray();
+ }
+
+ /**
+ * Creates the {@link AdaptiveFeatureGenerator}. Usually this
+ * is a set of generators contained in the {@link AggregatedFeatureGenerator}.
+ *
+ * Note:
+ * The generators are created on every call to this method.
+ *
+ * @return the feature generator or null if there is no descriptor in the model
+ */
+ public AdaptiveFeatureGenerator createFeatureGenerators() {
+
+ if (featureGeneratorBytes == null && artifactProvider != null) {
+ featureGeneratorBytes = artifactProvider.getArtifact(
+ POSModel.GENERATOR_DESCRIPTOR_ENTRY_NAME);
+ }
+
+ if (featureGeneratorBytes == null) {
+ featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
+ }
+
+ InputStream descriptorIn = new ByteArrayInputStream(featureGeneratorBytes);
+
+ AdaptiveFeatureGenerator generator;
+ try {
+ generator = GeneratorFactory.create(descriptorIn, key -> {
+ if (artifactProvider != null) {
+ return artifactProvider.getArtifact(key);
+ }
+ else {
+ return resources.get(key);
+ }
+ });
+ } catch (InvalidFormatException e) {
+ // It is assumed that the creation of the feature generation does not
+ // fail after it succeeded once during model loading.
+
+ // But it might still be possible that such an exception is thrown,
+ // in this case the caller should not be forced to handle the exception
+ // and a Runtime Exception is thrown instead.
+
+ // If the re-creation of the feature generation fails it is assumed
+ // that this can only be caused by a programming mistake and therefore
+ // throwing a Runtime Exception is reasonable
+
+ throw new IllegalStateException(); // FeatureGeneratorCreationError(e);
+ } catch (IOException e) {
+ throw new IllegalStateException("Reading from mem cannot result in an I/O error", e);
+ }
+
+ return generator;
+ }
+
@Override
@SuppressWarnings("rawtypes")
public Map<String, ArtifactSerializer> createArtifactSerializersMap() {
Map<String, ArtifactSerializer> serializers = super.createArtifactSerializersMap();
- POSDictionarySerializer.register(serializers);
- // the ngram Dictionary uses a base serializer, we don't need to add it here.
+
+ // NOTE: This is only needed for old models and this if can be removed if support is dropped
+ if (Version.currentVersion().getMinor() < 8) {
+ POSDictionarySerializer.register(serializers);
+ }
+
return serializers;
}
@@ -111,18 +225,37 @@ public class POSTaggerFactory extends BaseToolFactory {
this.posDictionary = dictionary;
}
+ protected Map<String, Object> getResources() {
+
+
+ if (resources != null) {
+ return resources;
+ }
+
+ return Collections.emptyMap();
+ }
+
+ protected byte[] getFeatureGenerator() {
+ return featureGeneratorBytes;
+ }
+
public TagDictionary getTagDictionary() {
if (this.posDictionary == null && artifactProvider != null)
this.posDictionary = artifactProvider.getArtifact(TAG_DICTIONARY_ENTRY_NAME);
return this.posDictionary;
}
+ /**
+ * @deprecated this will be reduced in visibility and later removed
+ */
+ @Deprecated
public Dictionary getDictionary() {
if (this.ngramDictionary == null && artifactProvider != null)
this.ngramDictionary = artifactProvider.getArtifact(NGRAM_DICTIONARY_ENTRY_NAME);
return this.ngramDictionary;
}
+ @Deprecated
public void setDictionary(Dictionary ngramDict) {
if (artifactProvider != null) {
throw new IllegalStateException(
@@ -132,10 +265,14 @@ public class POSTaggerFactory extends BaseToolFactory {
}
public POSContextGenerator getPOSContextGenerator() {
- return new DefaultPOSContextGenerator(0, getDictionary());
+ return getPOSContextGenerator(0);
}
public POSContextGenerator getPOSContextGenerator(int cacheSize) {
+ if (Version.currentVersion().getMinor() >= 8) {
+ return new ConfigurablePOSContextGenerator(cacheSize, createFeatureGenerators());
+ }
+
return new DefaultPOSContextGenerator(cacheSize, getDictionary());
}
@@ -143,7 +280,9 @@ public class POSTaggerFactory extends BaseToolFactory {
return new DefaultPOSSequenceValidator(getTagDictionary());
}
- static class POSDictionarySerializer implements ArtifactSerializer<POSDictionary> {
+ // TODO: This should not be done anymore for 8 models, they can just
+ // use the SerializableArtifact interface
+ public static class POSDictionarySerializer implements ArtifactSerializer<POSDictionary> {
public POSDictionary create(InputStream in) throws IOException {
return POSDictionary.create(new UncloseableInputStream(in));
@@ -218,6 +357,7 @@ public class POSTaggerFactory extends BaseToolFactory {
}
+ @Deprecated
public static POSTaggerFactory create(String subclassName,
Dictionary ngramDictionary, TagDictionary posDictionary)
throws InvalidFormatException {
@@ -233,11 +373,34 @@ public class POSTaggerFactory extends BaseToolFactory {
} catch (Exception e) {
String msg = "Could not instantiate the " + subclassName
+ ". The initialization throw an exception.";
- System.err.println(msg);
- e.printStackTrace();
throw new InvalidFormatException(msg, e);
}
+ }
+
+ public static POSTaggerFactory create(String subclassName, byte[] featureGeneratorBytes,
+ Map<String, Object> resources, TagDictionary posDictionary)
+ throws InvalidFormatException {
+
+ POSTaggerFactory theFactory;
+
+ if (subclassName == null) {
+ // will create the default factory
+ theFactory = new POSTaggerFactory(null, posDictionary);
+ }
+ else {
+ try {
+ theFactory = ExtensionLoader.instantiateExtension(
+ POSTaggerFactory.class, subclassName);
+ } catch (Exception e) {
+ String msg = "Could not instantiate the " + subclassName
+ + ". The initialization throw an exception.";
+ throw new InvalidFormatException(msg, e);
+ }
+ }
+
+ theFactory.init(featureGeneratorBytes, resources, posDictionary);
+ return theFactory;
}
public TagDictionary createEmptyTagDictionary() {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index ef08cfb..a1ac72b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -489,6 +489,17 @@ public class GeneratorFactory {
}
}
+ static class PosTaggerFeatureGeneratorFactory implements XmlFeatureGeneratorFactory {
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+ return new PosTaggerFeatureGenerator();
+ }
+
+ static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("postagger", new PosTaggerFeatureGeneratorFactory());
+ }
+ }
+
/**
* @see WindowFeatureGenerator
*/
@@ -658,6 +669,7 @@ public class GeneratorFactory {
TokenFeatureGeneratorFactory.register(factories);
BigramNameFeatureGeneratorFactory.register(factories);
TokenPatternFeatureGeneratorFactory.register(factories);
+ PosTaggerFeatureGeneratorFactory.register(factories);
PrefixFeatureGeneratorFactory.register(factories);
SuffixFeatureGeneratorFactory.register(factories);
WindowFeatureGeneratorFactory.register(factories);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGenerator.java
new file mode 100644
index 0000000..c32baec
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGenerator.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.List;
+
+public class PosTaggerFeatureGenerator implements AdaptiveFeatureGenerator {
+
+ private final String SB = "S=begin";
+
+ @Override
+ public void createFeatures(List<String> features, String[] tokens, int index,
+ String[] tags) {
+
+ String prev, prevprev = null;
+ String tagprev, tagprevprev;
+ tagprev = tagprevprev = null;
+
+ if (index - 1 >= 0) {
+ prev = tokens[index - 1];
+ tagprev = tags[index - 1];
+
+ if (index - 2 >= 0) {
+ prevprev = tokens[index - 2];
+ tagprevprev = tags[index - 2];
+ }
+ else {
+ prevprev = SB;
+ }
+ }
+ else {
+ prev = SB;
+ }
+
+ // add the words and pos's of the surrounding context
+ if (prev != null) {
+ if (tagprev != null) {
+ features.add("t=" + tagprev);
+ }
+ if (prevprev != null) {
+ if (tagprevprev != null) {
+ features.add("t2=" + tagprevprev + "," + tagprev);
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
new file mode 100644
index 0000000..0be1fc8
--- /dev/null
+++ b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
@@ -0,0 +1,38 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+
+<!-- Default pos tagger feature generator configuration -->
+<generators>
+ <cache>
+ <generators>
+ <definition/>
+ <suffix/>
+ <prefix/>
+ <window prevLength = "2" nextLength = "2">
+ <token/>
+ </window>
+ <window prevLength = "2" nextLength = "2">
+ <sentence begin="true" end="false"/>
+ </window>
+ <tokenclass/>
+ <postagger/>
+ </generators>
+ </cache>
+</generators>
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/test/java/opennlp/tools/postag/ConfigurablePOSContextGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/ConfigurablePOSContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/ConfigurablePOSContextGeneratorTest.java
new file mode 100644
index 0000000..f00e855
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/ConfigurablePOSContextGeneratorTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.postag;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
+import opennlp.tools.util.featuregen.TokenFeatureGenerator;
+
+public class ConfigurablePOSContextGeneratorTest {
+
+ private void testContextGeneration(int cacheSize) {
+ AdaptiveFeatureGenerator fg = new TokenFeatureGenerator();
+ ConfigurablePOSContextGenerator cg = new ConfigurablePOSContextGenerator(cacheSize, fg);
+
+ String[] tokens = new String[] {"a", "b", "c", "d", "e"};
+ String[] tags = new String[] {"t_a", "t_b", "t_c", "t_d", "t_e"};
+
+ cg.getContext(0, tokens, tags, null);
+
+ Assert.assertEquals(1, cg.getContext(0, tokens, tags, null).length);
+ Assert.assertEquals("w=a", cg.getContext(0, tokens, tags, null)[0]);
+ Assert.assertEquals("w=b", cg.getContext(1, tokens, tags, null)[0]);
+ Assert.assertEquals("w=c", cg.getContext(2, tokens, tags, null)[0]);
+ Assert.assertEquals("w=d", cg.getContext(3, tokens, tags, null)[0]);
+ Assert.assertEquals("w=e", cg.getContext(4, tokens, tags, null)[0]);
+ }
+
+ @Test
+ public void testWithoutCache() {
+ testContextGeneration(0);
+ }
+
+ @Test
+ public void testWithCache() {
+ testContextGeneration(3);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java b/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
index e0ce2a6..91228fc 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
@@ -36,8 +36,8 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
public DummyPOSTaggerFactory() {
}
- public DummyPOSTaggerFactory(Dictionary ngramDictionary, DummyPOSDictionary posDictionary) {
- super(ngramDictionary, null);
+ public DummyPOSTaggerFactory(DummyPOSDictionary posDictionary) {
+ super(null, null, null);
this.dict = posDictionary;
}
@@ -81,7 +81,7 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
}
- static class DummyPOSDictionarySerializer implements ArtifactSerializer<DummyPOSDictionary> {
+ public static class DummyPOSDictionarySerializer implements ArtifactSerializer<DummyPOSDictionary> {
public DummyPOSDictionary create(InputStream in) throws IOException {
return DummyPOSDictionary.create(new UncloseableInputStream(in));
@@ -106,6 +106,9 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
private POSDictionary dict;
+ public DummyPOSDictionary() {
+ }
+
public DummyPOSDictionary(POSDictionary dict) {
this.dict = dict;
}
@@ -123,6 +126,9 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
return dict.getTags(word);
}
+ @Override
+ public Class<?> getArtifactSerializerClass() {
+ return DummyPOSDictionarySerializer.class;
+ }
}
-
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/530432ce/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
index edb20b3..b98d3bf 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
@@ -25,7 +25,6 @@ import java.nio.charset.StandardCharsets;
import org.junit.Assert;
import org.junit.Test;
-import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSContextGenerator;
import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSDictionary;
@@ -62,9 +61,8 @@ public class POSTaggerFactoryTest {
DummyPOSDictionary posDict = new DummyPOSDictionary(
POSDictionary.create(POSDictionaryTest.class
.getResourceAsStream("TagDictionaryCaseSensitive.xml")));
- Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);
- POSModel posModel = trainPOSModel(new DummyPOSTaggerFactory(dic, posDict));
+ POSModel posModel = trainPOSModel(new DummyPOSTaggerFactory(posDict));
POSTaggerFactory factory = posModel.getFactory();
Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
@@ -81,22 +79,18 @@ public class POSTaggerFactoryTest {
Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
Assert.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator);
Assert.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator);
- Assert.assertTrue(factory.getDictionary() != null);
}
@Test
public void testPOSTaggerWithDefaultFactory() throws IOException {
POSDictionary posDict = POSDictionary.create(POSDictionaryTest.class
.getResourceAsStream("TagDictionaryCaseSensitive.xml"));
- Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);
-
- POSModel posModel = trainPOSModel(new POSTaggerFactory(dic, posDict));
+ POSModel posModel = trainPOSModel(new POSTaggerFactory(null, null, posDict));
POSTaggerFactory factory = posModel.getFactory();
Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary);
Assert.assertTrue(factory.getPOSContextGenerator() != null);
Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
- Assert.assertTrue(factory.getDictionary() != null);
ByteArrayOutputStream out = new ByteArrayOutputStream();
posModel.serialize(out);
@@ -108,7 +102,6 @@ public class POSTaggerFactoryTest {
Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary);
Assert.assertTrue(factory.getPOSContextGenerator() != null);
Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
- Assert.assertTrue(factory.getDictionary() != null);
}
@Test(expected = InvalidFormatException.class)
[11/24] opennlp git commit: OPENNLP-995: Add a PR Review Template for
contributors
Posted by co...@apache.org.
OPENNLP-995: Add a PR Review Template for contributors
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/decaab59
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/decaab59
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/decaab59
Branch: refs/heads/OPENNLP-778
Commit: decaab59b5f75c478efe3add62373c9781f4df12
Parents: 413c7fc
Author: smarthi <sm...@apache.org>
Authored: Thu Feb 23 07:46:17 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Thu Feb 23 07:46:17 2017 -0500
----------------------------------------------------------------------
.github/CONTRIBUTING.md | 11 +++++++++++
.github/PULL_REQUEST_TEMPLATE.md | 27 +++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/decaab59/.github/CONTRIBUTING.md
----------------------------------------------------------------------
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..577eb16
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+# How to contribute to Apache OpenNLP
+
+Thank you for your intention to contribute to the Apache OpenNLP project. As an open-source community, we highly appreciate external contributions to our project.
+
+To make the process smooth for the project *committers* (those who review and accept changes) and *contributors* (those who propose new changes via pull requests), there are a few rules to follow.
+
+## Contribution Guidelines
+
+Please check out the [How to get involved](http://opennlp.apache.org/get-involved.html) to understand how contributions are made.
+A detailed list of coding standards can be found at [Apache OpenNLP Code Conventions](http://opennlp.apache.org/code-conventions.html) which also contains a list of coding guidelines that you should follow.
+For pull requests, there is a [check list](PULL_REQUEST_TEMPLATE.md) with criteria for acceptable contributions.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/decaab59/.github/PULL_REQUEST_TEMPLATE.md
----------------------------------------------------------------------
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..579e2e0
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,27 @@
+Thank you for contributing to Apache OpenNLP.
+
+In order to streamline the review of the contribution we ask you
+to ensure the following steps have been taken:
+
+### For all changes:
+- [ ] Is there a JIRA ticket associated with this PR? Is it referenced
+ in the commit message?
+
+- [ ] Does your PR title start with OPENNLP-XXXX where XXXX is the JIRA number you are trying to resolve? Pay particular attention to the hyphen "-" character.
+
+- [ ] Has your PR been rebased against the latest commit within the target branch (typically master)?
+
+- [ ] Is your initial contribution a single, squashed commit?
+
+### For code changes:
+- [ ] Have you ensured that the full suite of tests is executed via mvn clean install at the root opennlp folder?
+- [ ] Have you written or updated unit tests to verify your changes?
+- [ ] If adding new dependencies to the code, are these dependencies licensed in a way that is compatible for inclusion under [ASF 2.0](http://www.apache.org/legal/resolved.html#category-a)?
+- [ ] If applicable, have you updated the LICENSE file, including the main LICENSE file in opennlp folder?
+- [ ] If applicable, have you updated the NOTICE file, including the main NOTICE file found in opennlp folder?
+
+### For documentation related changes:
+- [ ] Have you ensured that format looks appropriate for the output in which it is rendered?
+
+### Note:
+Please ensure that once the PR is submitted, you check travis-ci for build issues and submit an update to your PR as soon as possible.
[08/24] opennlp git commit: OPENNLP-229: Add test for
NameFinderSequenceValidator
Posted by co...@apache.org.
OPENNLP-229: Add test for NameFinderSequenceValidator
This closes #125
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/c5a15b2b
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/c5a15b2b
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/c5a15b2b
Branch: refs/heads/OPENNLP-778
Commit: c5a15b2b66b6b4f018589d8f60cc7acc71861822
Parents: 8e610f1
Author: Peter Thygesen <pe...@gmail.com>
Authored: Wed Feb 15 21:12:48 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Fri Feb 17 14:43:07 2017 +0100
----------------------------------------------------------------------
.../NameFinderSequenceValidatorTest.java | 186 +++++++++++++++++++
1 file changed, 186 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c5a15b2b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
new file mode 100644
index 0000000..35752c1
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.namefind;
+
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * This is the test class for {@link NameFinderSequenceValidator}..
+ */
+public class NameFinderSequenceValidatorTest {
+
+ private static NameFinderSequenceValidator validator = new NameFinderSequenceValidator();
+ private static String START_A = "TypeA-" + NameFinderME.START;
+ private static String CONTINUE_A = "TypeA-" + NameFinderME.CONTINUE;
+ private static String START_B = "TypeB-" + NameFinderME.START;
+ private static String CONTINUE_B = "TypeB-" + NameFinderME.CONTINUE;
+ private static String OTHER = NameFinderME.OTHER;
+
+ @Test
+ public void testContinueCannotBeFirstOutcome() {
+
+ final String outcome = CONTINUE_A;
+
+ String[] inputSequence = new String[] {"PersonA", "is", "here"};
+ String[] outcomesSequence = new String[] {};
+ Assert.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+
+ }
+
+ @Test
+ public void testContinueAfterStartAndSameType() {
+
+ final String outcome = CONTINUE_A;
+
+ // previous start, same name type
+ String[] inputSequence = new String[] {"Stefanie", "Schmidt", "is", "German"};
+ String[] outcomesSequence = new String[] {START_A};
+ Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+ }
+
+ @Ignore
+ @Test
+ public void testContinueAfterStartAndNotSameType() {
+
+ final String outcome = CONTINUE_B;
+
+ // previous start, not same name type
+ String[] inputSequence = new String[] {"PersonA", "LocationA", "something"};
+ String[] outcomesSequence = new String[] {START_A};
+ Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+ }
+
+ @Test
+ public void testContinueAfterContinueAndSameType() {
+
+ final String outcome = CONTINUE_A;
+
+ // previous continue, same name type
+ String[] inputSequence = new String[] {"FirstName", "MidleName", "LastName", "is", "a", "long", "name"};
+ String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+ }
+
+ @Test
+ public void testContinueAfterContinueAndNotSameType() {
+
+ final String outcome = CONTINUE_B;
+
+ // previous continue, not same name type
+ String[] inputSequence = new String[] {"FirstName", "LastName", "LocationA", "something"};
+ String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+ Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+ }
+
+ @Test
+ public void testContinueAfterOther() {
+
+ final String outcome = CONTINUE_A;
+
+ // previous other
+ String[] inputSequence = new String[] {"something", "is", "wrong", "here"};
+ String[] outcomesSequence = new String[] {OTHER};
+ Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+ }
+
+ @Test
+ public void testStartIsAlwaysAValidOutcome() {
+
+ final String outcome = START_A;
+
+ // pos zero
+ String[] inputSequence = new String[] {"PersonA", "is", "here"};
+ String[] outcomesSequence = new String[] {};
+ Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+
+ // pos one, previous other
+ inputSequence = new String[] {"it's", "PersonA", "again"};
+ outcomesSequence = new String[] {OTHER};
+ Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+ // pos one, previous start
+ inputSequence = new String[] {"PersonA", "PersonB", "something"};
+ outcomesSequence = new String[] {START_A};
+ Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+ // pos two, previous other
+ inputSequence = new String[] {"here", "is", "PersonA"};
+ outcomesSequence = new String[] {OTHER, OTHER};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+ // pos two, previous start, same name type
+ inputSequence = new String[] {"is", "PersonA", "PersoneB"};
+ outcomesSequence = new String[] {OTHER, START_A};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+ // pos two, previous start, different name type
+ inputSequence = new String[] {"something", "PersonA", "OrganizationA"};
+ outcomesSequence = new String[] {OTHER, START_B};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+ // pos two, previous continue, same name type
+ inputSequence = new String[] {"Stefanie", "Schmidt", "PersonB", "something"};
+ outcomesSequence = new String[] {START_A, CONTINUE_A};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+ // pos two, previous continue, not same name type
+ inputSequence = new String[] {"Stefanie", "Schmidt", "OrganizationA", "something"};
+ outcomesSequence = new String[] {START_B, CONTINUE_B};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+ }
+
+ @Test
+ public void testOtherIsAlwaysAValidOutcome() {
+
+ final String outcome = OTHER;
+
+ // pos zero
+ String[] inputSequence = new String[] {"it's", "a", "test"};
+ String[] outcomesSequence = new String[] {};
+ Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+
+ // pos one, previous other
+ inputSequence = new String[] {"it's", "a", "test"};
+ outcomesSequence = new String[] {OTHER};
+ Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+ // pos one, previous start
+ inputSequence = new String[] {"Mike", "is", "here"};
+ outcomesSequence = new String[] {START_A};
+ Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+ // pos two, previous other
+ inputSequence = new String[] {"it's", "a", "test"};
+ outcomesSequence = new String[] {OTHER, OTHER};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+ // pos two, previous start
+ inputSequence = new String[] {"is", "Mike", "here"};
+ outcomesSequence = new String[] {OTHER, START_A};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+ // pos two, previous continue
+ inputSequence = new String[] {"Stefanie", "Schmidt", "lives", "at", "home"};
+ outcomesSequence = new String[] {START_A, CONTINUE_A};
+ Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+ }
+
+}
[21/24] opennlp git commit: OPENNLP-1002 Remove deprecated GIS class
Posted by co...@apache.org.
OPENNLP-1002 Remove deprecated GIS class
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/efa25767
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/efa25767
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/efa25767
Branch: refs/heads/OPENNLP-778
Commit: efa257676280abd316bb677e5a8de5cb9fe1dd73
Parents: 530432c
Author: J�rn Kottmann <jo...@apache.org>
Authored: Fri Mar 10 17:13:36 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Fri Mar 10 17:13:36 2017 +0100
----------------------------------------------------------------------
.../main/java/opennlp/tools/ml/maxent/GIS.java | 303 -------------------
1 file changed, 303 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/efa25767/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
deleted file mode 100644
index 97c214d..0000000
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.ml.maxent;
-
-import java.io.IOException;
-
-import opennlp.tools.ml.AbstractEventTrainer;
-import opennlp.tools.ml.model.AbstractModel;
-import opennlp.tools.ml.model.DataIndexer;
-import opennlp.tools.ml.model.Event;
-import opennlp.tools.ml.model.Prior;
-import opennlp.tools.ml.model.UniformPrior;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.TrainingParameters;
-
-/**
- * A Factory class which uses instances of GISTrainer to create and train
- * GISModels.
- * @deprecated use {@link GISTrainer}
- */
-@Deprecated
-public class GIS extends AbstractEventTrainer {
-
- public static final String MAXENT_VALUE = "MAXENT";
-
- /**
- * Set this to false if you don't want messages about the progress of model
- * training displayed. Alternately, you can use the overloaded version of
- * trainModel() to conditionally enable progress messages.
- */
- public static boolean PRINT_MESSAGES = true;
-
- /**
- * If we are using smoothing, this is used as the "number" of times we want
- * the trainer to imagine that it saw a feature that it actually didn't see.
- * Defaulted to 0.1.
- */
- private static final double SMOOTHING_OBSERVATION = 0.1;
-
- private static final String SMOOTHING_PARAM = "smoothing";
- private static final boolean SMOOTHING_DEFAULT = false;
-
- public GIS() {
- }
-
- public GIS(TrainingParameters parameters) {
- super(parameters);
- }
-
- public boolean isValid() {
-
- if (!super.isValid()) {
- return false;
- }
-
- String algorithmName = getAlgorithm();
-
- return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName)));
- }
-
- public boolean isSortAndMerge() {
- return true;
- }
-
- public AbstractModel doTrain(DataIndexer indexer) throws IOException {
- int iterations = getIterations();
-
- AbstractModel model;
-
- boolean printMessages = trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT);
- boolean smoothing = trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT);
- int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1);
-
- model = trainModel(iterations, indexer, printMessages, smoothing, null, threads);
-
- return model;
- }
-
- // << members related to AbstractEventTrainer
-
- /**
- * Train a model using the GIS algorithm, assuming 100 iterations and no
- * cutoff.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream) throws IOException {
- return trainModel(eventStream, 100, 0, false, PRINT_MESSAGES);
- }
-
- /**
- * Train a model using the GIS algorithm, assuming 100 iterations and no
- * cutoff.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream, boolean smoothing)
- throws IOException {
- return trainModel(eventStream, 100, 0, smoothing, PRINT_MESSAGES);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @param iterations
- * The number of GIS iterations to perform.
- * @param cutoff
- * The number of times a feature must be seen in order to be relevant
- * for training.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
- int cutoff) throws IOException {
- return trainModel(eventStream, iterations, cutoff, false, PRINT_MESSAGES);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @param iterations
- * The number of GIS iterations to perform.
- * @param cutoff
- * The number of times a feature must be seen in order to be relevant
- * for training.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @param printMessagesWhileTraining
- * Determines whether training status messages are written to STDOUT.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
- int cutoff, boolean smoothing, boolean printMessagesWhileTraining)
- throws IOException {
- GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
- trainer.setSmoothing(smoothing);
- trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
- return trainer.trainModel(eventStream, iterations, cutoff);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @param iterations
- * The number of GIS iterations to perform.
- * @param cutoff
- * The number of times a feature must be seen in order to be relevant
- * for training.
- * @param sigma
- * The standard deviation for the gaussian smoother.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
- int cutoff, double sigma) throws IOException {
- GISTrainer trainer = new GISTrainer(PRINT_MESSAGES);
- if (sigma > 0) {
- trainer.setGaussianSigma(sigma);
- }
- return trainer.trainModel(eventStream, iterations, cutoff);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) {
- return trainModel(iterations, indexer, true, smoothing, null, 1);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer) {
- return trainModel(iterations, indexer, true, false, null, 1);
- }
-
- /**
- * Train a model using the GIS algorithm with the specified number of
- * iterations, data indexer, and prior.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @param modelPrior
- * The prior distribution for the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer,
- Prior modelPrior, int cutoff) {
- return trainModel(iterations, indexer, true, false, modelPrior, cutoff);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @param printMessagesWhileTraining
- * Determines whether training status messages are written to STDOUT.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @param modelPrior
- * The prior distribution for the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer,
- boolean printMessagesWhileTraining, boolean smoothing,
- Prior modelPrior) {
- return trainModel(iterations, indexer, printMessagesWhileTraining, smoothing, modelPrior, 1);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @param printMessagesWhileTraining
- * Determines whether training status messages are written to STDOUT.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @param modelPrior
- * The prior distribution for the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer,
- boolean printMessagesWhileTraining, boolean smoothing,
- Prior modelPrior, int threads) {
- GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
- trainer.setSmoothing(smoothing);
- trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
- if (modelPrior == null) {
- modelPrior = new UniformPrior();
- }
- return trainer.trainModel(iterations, indexer, modelPrior, threads);
- }
-}
-
-
-
[05/24] opennlp git commit: OPENNLP-990 Fix all array style
violations and add a checkstyle rule
Posted by co...@apache.org.
OPENNLP-990 Fix all array style violations and add a checkstyle rule
This closes #127
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/bbd6d3fc
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/bbd6d3fc
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/bbd6d3fc
Branch: refs/heads/OPENNLP-778
Commit: bbd6d3fc4178d9209e3e53868ce4ec9b74679f3b
Parents: b7d3abc
Author: Peter Thygesen <pe...@gmail.com>
Authored: Thu Feb 16 12:48:12 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Thu Feb 16 13:03:36 2017 +0100
----------------------------------------------------------------------
checkstyle.xml | 1 +
.../opennlp/bratann/NameFinderAnnService.java | 2 +-
.../opennlp/bratann/NameFinderResource.java | 6 ++---
.../java/opennlp/morfologik/cmdline/CLI.java | 2 +-
.../chunker/ChunkSampleSequenceStream.java | 4 +--
.../java/opennlp/tools/chunker/Chunker.java | 4 +--
.../tools/cmdline/AbstractConverterTool.java | 2 +-
.../opennlp/tools/cmdline/ArgumentParser.java | 16 +++++------
.../opennlp/tools/cmdline/BasicCmdLineTool.java | 2 +-
.../main/java/opennlp/tools/cmdline/CLI.java | 2 +-
.../java/opennlp/tools/cmdline/CmdLineUtil.java | 10 +++----
.../tools/cmdline/EvaluationErrorPrinter.java | 10 +++----
.../cmdline/FineGrainedReportListener.java | 2 +-
.../tools/cmdline/ObjectStreamFactory.java | 2 +-
.../opennlp/tools/cmdline/TypedCmdLineTool.java | 2 +-
.../tools/cmdline/doccat/DoccatTool.java | 2 +-
.../cmdline/entitylinker/EntityLinkerTool.java | 2 +-
.../TokenNameFinderCrossValidatorTool.java | 4 +--
.../namefind/TokenNameFinderEvaluatorTool.java | 2 +-
.../cmdline/namefind/TokenNameFinderTool.java | 6 ++---
.../namefind/TokenNameFinderTrainerTool.java | 8 +++---
.../tools/cmdline/postag/POSTaggerTool.java | 2 +-
.../tokenizer/DictionaryDetokenizerTool.java | 2 +-
.../opennlp/tools/dictionary/Dictionary.java | 2 +-
.../tools/doccat/DocumentCategorizer.java | 6 ++---
.../DocumentCategorizerContextGenerator.java | 2 +-
.../doccat/DocumentCategorizerEvaluator.java | 4 +--
.../tools/doccat/DocumentCategorizerME.java | 4 +--
.../opennlp/tools/doccat/DocumentSample.java | 4 +--
.../tools/doccat/DocumentSampleStream.java | 4 +--
.../formats/BioNLP2004NameSampleStream.java | 2 +-
.../tools/formats/Conll02NameSampleStream.java | 2 +-
.../tools/formats/Conll03NameSampleStream.java | 2 +-
.../tools/formats/ConllXPOSSampleStream.java | 2 +-
.../tools/formats/DirectorySampleStream.java | 4 +--
.../tools/formats/EvalitaNameSampleStream.java | 2 +-
.../formats/LeipzigDoccatSampleStream.java | 2 +-
.../LeipzigDocumentSampleStreamFactory.java | 4 +--
.../formats/brat/BratAnnotationStream.java | 10 +++----
.../tools/formats/brat/BratDocument.java | 2 +-
.../formats/brat/BratNameSampleStream.java | 2 +-
.../convert/FileToByteArraySampleStream.java | 2 +-
.../convert/FileToStringSampleStream.java | 2 +-
.../formats/muc/MucNameContentHandler.java | 2 +-
.../ontonotes/OntoNotesNameSampleStream.java | 2 +-
.../lemmatizer/LemmaSampleSequenceStream.java | 6 ++---
.../opennlp/tools/lemmatizer/Lemmatizer.java | 2 +-
.../main/java/opennlp/tools/ml/BeamSearch.java | 4 +--
.../java/opennlp/tools/ml/maxent/GISModel.java | 2 +-
.../opennlp/tools/ml/model/MaxentModel.java | 2 +-
.../SimplePerceptronSequenceTrainer.java | 4 +--
.../java/opennlp/tools/namefind/BioCodec.java | 2 +-
.../namefind/DefaultNameContextGenerator.java | 4 +--
.../tools/namefind/DictionaryNameFinder.java | 2 +-
.../tools/namefind/NameFinderEventStream.java | 2 +-
.../opennlp/tools/namefind/NameFinderME.java | 2 +-
.../java/opennlp/tools/namefind/NameSample.java | 2 +-
.../namefind/NameSampleSequenceStream.java | 4 +--
.../opennlp/tools/namefind/RegexNameFinder.java | 8 +++---
.../opennlp/tools/namefind/TokenNameFinder.java | 2 +-
.../namefind/TokenNameFinderCrossValidator.java | 4 +--
.../namefind/TokenNameFinderEvaluator.java | 4 +--
.../tools/namefind/TokenNameFinderFactory.java | 2 +-
.../tools/namefind/TokenNameFinderModel.java | 2 +-
.../tools/parser/AbstractBottomUpParser.java | 6 ++---
.../tools/parser/ChunkContextGenerator.java | 2 +-
.../tools/parser/ParserChunkerFactory.java | 2 +-
.../parser/ParserChunkerSequenceValidator.java | 2 +-
.../opennlp/tools/parser/PosSampleStream.java | 4 +--
.../opennlp/tools/postag/POSDictionary.java | 2 +-
.../java/opennlp/tools/postag/POSEvaluator.java | 4 +--
.../java/opennlp/tools/postag/POSSample.java | 10 +++----
.../tools/postag/POSSampleEventStream.java | 6 ++---
.../tools/postag/POSSampleSequenceStream.java | 4 +--
.../sentdetect/DefaultEndOfSentenceScanner.java | 4 +--
.../sentdetect/SentenceDetectorEvaluator.java | 6 ++---
.../tools/sentdetect/SentenceDetectorME.java | 2 +-
.../tools/sentdetect/SentenceSample.java | 2 +-
.../tokenize/DetokenizationDictionary.java | 4 +--
.../opennlp/tools/tokenize/Detokenizer.java | 4 +--
.../tools/tokenize/DictionaryDetokenizer.java | 6 ++---
.../tools/tokenize/TokSpanEventStream.java | 2 +-
.../opennlp/tools/tokenize/TokenSample.java | 6 ++---
.../tools/tokenize/TokenizerEvaluator.java | 2 +-
.../opennlp/tools/tokenize/TokenizerStream.java | 2 +-
.../java/opennlp/tools/util/SequenceCodec.java | 2 +-
.../java/opennlp/tools/util/StringList.java | 2 +-
.../java/opennlp/tools/util/StringUtil.java | 4 +--
.../DocumentBeginFeatureGenerator.java | 2 +-
.../tools/util/featuregen/InSpanGenerator.java | 4 +--
.../util/featuregen/WordClusterDictionary.java | 2 +-
.../opennlp/tools/util/model/ModelUtil.java | 2 +-
.../opennlp/tools/chunker/ChunkerMETest.java | 2 +-
.../tools/cmdline/ArgumentParserTest.java | 6 ++---
.../tools/doccat/DocumentCategorizerMETest.java | 4 +--
.../tools/doccat/DocumentCategorizerNBTest.java | 4 +--
.../formats/ConllXPOSSampleStreamTest.java | 8 +++---
.../ConstitParseSampleStreamTest.java | 4 +--
.../java/opennlp/tools/ml/BeamSearchTest.java | 22 +++++++--------
.../namefind/DictionaryNameFinderTest.java | 28 ++++++++++----------
.../tools/namefind/NameFinderMETest.java | 12 ++++-----
.../opennlp/tools/namefind/NameSampleTest.java | 2 +-
.../tools/namefind/RegexNameFinderTest.java | 6 ++---
.../java/opennlp/tools/parser/ParseTest.java | 2 +-
.../opennlp/tools/postag/POSTaggerMETest.java | 2 +-
.../tools/postag/WordTagSampleStreamTest.java | 4 +--
.../sentdetect/NewlineSentenceDetectorTest.java | 2 +-
.../sentdetect/SentenceDetectorMETest.java | 2 +-
.../tokenize/DictionaryDetokenizerTest.java | 10 +++----
.../tools/tokenize/TokenSampleStreamTest.java | 6 ++---
.../opennlp/tools/tokenize/TokenSampleTest.java | 2 +-
.../opennlp/tools/tokenize/TokenizerMETest.java | 4 +--
.../opennlp/tools/util/eval/FMeasureTest.java | 10 +++----
.../featuregen/CachedFeatureGeneratorTest.java | 6 ++---
.../PreviousMapFeatureGeneratorTest.java | 2 +-
.../main/java/opennlp/uima/chunker/Chunker.java | 10 +++----
.../doccat/AbstractDocumentCategorizer.java | 2 +-
.../uima/namefind/AbstractNameFinder.java | 4 +--
.../java/opennlp/uima/namefind/NameFinder.java | 6 ++---
.../opennlp/uima/normalizer/NumberUtil.java | 2 +-
.../main/java/opennlp/uima/parser/Parser.java | 8 +++---
.../java/opennlp/uima/postag/POSTagger.java | 2 +-
.../sentdetect/AbstractSentenceDetector.java | 4 +--
.../uima/sentdetect/SentenceDetector.java | 4 +--
.../uima/tokenize/AbstractTokenizer.java | 8 +++---
.../java/opennlp/uima/tokenize/Tokenizer.java | 2 +-
.../java/opennlp/uima/util/OpennlpUtil.java | 2 +-
127 files changed, 264 insertions(+), 263 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/checkstyle.xml
----------------------------------------------------------------------
diff --git a/checkstyle.xml b/checkstyle.xml
index ab65feb..1bfe788 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -126,5 +126,6 @@
value="STANDARD_JAVA_PACKAGE###THIRD_PARTY_PACKAGE###SPECIAL_IMPORTS###STATIC"/>
</module>
<module name="EqualsHashCode"/>
+ <module name="ArrayTypeStyle"/>
</module>
</module>
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
index 5519866..a6085e7 100644
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
@@ -44,7 +44,7 @@ public class NameFinderAnnService {
public static SentenceDetector sentenceDetector = new NewlineSentenceDetector();
public static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
- public static TokenNameFinder nameFinders[];
+ public static TokenNameFinder[] nameFinders;
public static void main(String[] args) throws Exception {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
index bd19bca..468f898 100644
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
@@ -39,7 +39,7 @@ public class NameFinderResource {
private SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector;
private Tokenizer tokenizer = NameFinderAnnService.tokenizer;
- private TokenNameFinder nameFinders[] = NameFinderAnnService.nameFinders;
+ private TokenNameFinder[] nameFinders = NameFinderAnnService.nameFinders;
private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset, int endOffset) {
for (int i = beginOffset; i < endOffset; i++) {
@@ -66,10 +66,10 @@ public class NameFinderResource {
// offset of sentence gets lost here!
Span[] tokenSpans = tokenizer.tokenizePos(sentenceText);
- String tokens[] = Span.spansToStrings(tokenSpans, sentenceText);
+ String[] tokens = Span.spansToStrings(tokenSpans, sentenceText);
for (TokenNameFinder nameFinder : nameFinders) {
- Span names[] = nameFinder.find(tokens);
+ Span[] names = nameFinder.find(tokens);
for (Span name : names) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
index d659435..664c03a 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
@@ -101,7 +101,7 @@ public final class CLI {
System.exit(0);
}
- String toolArguments[] = new String[args.length - 1];
+ String[] toolArguments = new String[args.length - 1];
System.arraycopy(args, 1, toolArguments, 0, toolArguments.length);
String toolName = args[0];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
index 9898bd4..eb42aa9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
@@ -41,8 +41,8 @@ public class ChunkSampleSequenceStream implements SequenceStream {
ChunkSample sample = samples.read();
if (sample != null) {
- String sentence[] = sample.getSentence();
- String tags[] = sample.getTags();
+ String[] sentence = sample.getSentence();
+ String[] tags = sample.getTags();
Event[] events = new Event[sentence.length];
for (int i = 0; i < sentence.length; i++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
index c496d12..5bdec75 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
@@ -33,7 +33,7 @@ public interface Chunker {
*
* @return an array of chunk tags for each token in the sequence.
*/
- String[] chunk(String[] toks, String tags[]);
+ String[] chunk(String[] toks, String[] tags);
/**
* Generates tagged chunk spans for the given sequence returning the result in a span array.
@@ -43,7 +43,7 @@ public interface Chunker {
*
* @return an array of spans with chunk tags for each chunk in the sequence.
*/
- Span[] chunkAsSpans(String[] toks, String tags[]);
+ Span[] chunkAsSpans(String[] toks, String[] tags);
/**
* Returns the top k chunk sequences for the specified sentence with the specified pos-tags
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java
index a6b81ea..4c95b75 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java
@@ -89,7 +89,7 @@ public abstract class AbstractConverterTool<T> extends TypedCmdLineTool<T> {
format = args[0];
ObjectStreamFactory<T> streamFactory = getStreamFactory(format);
- String formatArgs[] = new String[args.length - 1];
+ String[] formatArgs = new String[args.length - 1];
System.arraycopy(args, 1, formatArgs, 0, formatArgs.length);
String helpString = createHelpString(format, ArgumentParser.createUsage(streamFactory.getParameters()));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
index 631bc34..8243560 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
@@ -169,7 +169,7 @@ public class ArgumentParser {
// all checks should also be performed for super interfaces
- Method methods[] = proxyInterface.getMethods();
+ Method[] methods = proxyInterface.getMethods();
if (methods.length == 0)
throw new IllegalArgumentException("proxy interface must at least declare one method!");
@@ -204,7 +204,7 @@ public class ArgumentParser {
private static String methodNameToParameter(String methodName) {
// remove get from method name
- char parameterNameChars[] = methodName.toCharArray();
+ char[] parameterNameChars = methodName.toCharArray();
// name length is checked to be at least 4 prior
parameterNameChars[3] = Character.toLowerCase(parameterNameChars[3]);
@@ -385,7 +385,7 @@ public class ArgumentParser {
* @return true, if arguments are valid
*/
@SuppressWarnings({"unchecked"})
- public static <T> boolean validateArguments(String args[], Class<T> argProxyInterface) {
+ public static <T> boolean validateArguments(String[] args, Class<T> argProxyInterface) {
return validateArguments(args, new Class[]{argProxyInterface});
}
@@ -398,7 +398,7 @@ public class ArgumentParser {
* @param argProxyInterfaces interfaces with parameters description
* @return true, if arguments are valid
*/
- public static boolean validateArguments(String args[], Class<?>... argProxyInterfaces) {
+ public static boolean validateArguments(String[] args, Class<?>... argProxyInterfaces) {
return null == validateArgumentsLoudly(args, argProxyInterfaces);
}
@@ -409,7 +409,7 @@ public class ArgumentParser {
* @param argProxyInterface interface with parameters description
* @return null, if arguments are valid or error message otherwise
*/
- public static String validateArgumentsLoudly(String args[], Class<?> argProxyInterface) {
+ public static String validateArgumentsLoudly(String[] args, Class<?> argProxyInterface) {
return validateArgumentsLoudly(args, new Class[]{argProxyInterface});
}
@@ -420,7 +420,7 @@ public class ArgumentParser {
* @param argProxyInterfaces interfaces with parameters description
* @return null, if arguments are valid or error message otherwise
*/
- public static String validateArgumentsLoudly(String args[], Class<?>... argProxyInterfaces) {
+ public static String validateArgumentsLoudly(String[] args, Class<?>... argProxyInterfaces) {
// number of parameters must be always be even
if (args.length % 2 != 0) {
return "Number of parameters must be always be even";
@@ -478,7 +478,7 @@ public class ArgumentParser {
* if the proxy interface is not compatible.
*/
@SuppressWarnings("unchecked")
- public static <T> T parse(String args[], Class<T> argProxyInterface) {
+ public static <T> T parse(String[] args, Class<T> argProxyInterface) {
checkProxyInterfaces(argProxyInterface);
@@ -533,7 +533,7 @@ public class ArgumentParser {
* @param <T> T
* @return arguments pertaining to argProxyInterface
*/
- public static <T> String[] filter(String args[], Class<T> argProxyInterface) {
+ public static <T> String[] filter(String[] args, Class<T> argProxyInterface) {
ArrayList<String> parameters = new ArrayList<>(args.length);
for (Method method : argProxyInterface.getMethods()) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
index abe73b4..f320986 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
@@ -29,5 +29,5 @@ public abstract class BasicCmdLineTool extends CmdLineTool {
*
* @param args arguments
*/
- public abstract void run(String args[]);
+ public abstract void run(String[] args);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
index 9385a18..b575f71 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
@@ -210,7 +210,7 @@ public final class CLI {
}
final long startTime = System.currentTimeMillis();
- String toolArguments[] = new String[args.length - 1];
+ String[] toolArguments = new String[args.length - 1];
System.arraycopy(args, 1, toolArguments, 0, toolArguments.length);
String toolName = args[0];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
index 7ea2a0b..1dfd7bd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
@@ -204,7 +204,7 @@ public final class CmdLineUtil {
* @param args arguments
* @return the index of the parameter in the arguments, or -1 if the parameter is not found
*/
- public static int getParameterIndex(String param, String args[]) {
+ public static int getParameterIndex(String param, String[] args) {
for (int i = 0; i < args.length; i++) {
if (args[i].startsWith("-") && args[i].equals(param)) {
return i;
@@ -221,7 +221,7 @@ public final class CmdLineUtil {
* @param args arguments
* @return parameter value
*/
- public static String getParameter(String param, String args[]) {
+ public static String getParameter(String param, String[] args) {
int i = getParameterIndex(param, args);
if (-1 < i) {
i++;
@@ -240,7 +240,7 @@ public final class CmdLineUtil {
* @param args arguments
* @return parameter value
*/
- public static Integer getIntParameter(String param, String args[]) {
+ public static Integer getIntParameter(String param, String[] args) {
String value = getParameter(param, args);
try {
@@ -261,7 +261,7 @@ public final class CmdLineUtil {
* @param args arguments
* @return parameter value
*/
- public static Double getDoubleParameter(String param, String args[]) {
+ public static Double getDoubleParameter(String param, String[] args) {
String value = getParameter(param, args);
try {
@@ -286,7 +286,7 @@ public final class CmdLineUtil {
}
}
- public static boolean containsParam(String param, String args[]) {
+ public static boolean containsParam(String param, String[] args) {
for (String arg : args) {
if (arg.equals(param)) {
return true;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
index f8a0d91..8ae25e6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
@@ -38,7 +38,7 @@ public abstract class EvaluationErrorPrinter<T> implements EvaluationMonitor<T>
}
// for the sentence detector
- protected void printError(Span references[], Span predictions[],
+ protected void printError(Span[] references, Span[] predictions,
T referenceSample, T predictedSample, String sentence) {
List<Span> falseNegatives = new ArrayList<>();
List<Span> falsePositives = new ArrayList<>();
@@ -55,7 +55,7 @@ public abstract class EvaluationErrorPrinter<T> implements EvaluationMonitor<T>
}
// for namefinder, chunker...
- protected void printError(String id, Span references[], Span predictions[],
+ protected void printError(String id, Span[] references, Span[] predictions,
T referenceSample, T predictedSample, String[] sentenceTokens) {
List<Span> falseNegatives = new ArrayList<>();
List<Span> falsePositives = new ArrayList<>();
@@ -75,13 +75,13 @@ public abstract class EvaluationErrorPrinter<T> implements EvaluationMonitor<T>
}
}
- protected void printError(Span references[], Span predictions[],
+ protected void printError(Span[] references, Span[] predictions,
T referenceSample, T predictedSample, String[] sentenceTokens) {
printError(null, references, predictions, referenceSample, predictedSample, sentenceTokens);
}
// for pos tagger
- protected void printError(String references[], String predictions[],
+ protected void printError(String[] references, String[] predictions,
T referenceSample, T predictedSample, String[] sentenceTokens) {
List<String> filteredDoc = new ArrayList<>();
List<String> filteredRefs = new ArrayList<>();
@@ -213,7 +213,7 @@ public abstract class EvaluationErrorPrinter<T> implements EvaluationMonitor<T>
* @param falsePositives
* [out] the false positives list
*/
- private void findErrors(Span references[], Span predictions[],
+ private void findErrors(Span[] references, Span[] predictions,
List<Span> falseNegatives, List<Span> falsePositives) {
falseNegatives.addAll(Arrays.asList(references));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java
index 03ce489..714561a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java
@@ -60,7 +60,7 @@ public abstract class FineGrainedReportListener {
private static String generateAlphaLabel(int index) {
- char labelChars[] = new char[3];
+ char[] labelChars = new char[3];
int i;
for (i = 2; i >= 0; i--) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
index 8bc6b95..4f48bbf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
@@ -36,5 +36,5 @@ public interface ObjectStreamFactory<T> {
* @param args arguments
* @return ObjectStream instance
*/
- ObjectStream<T> create(String args[]);
+ ObjectStream<T> create(String[] args);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
index bf4b381..85ab2cb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
@@ -116,7 +116,7 @@ public abstract class TypedCmdLineTool<T>
* @param format format to work with
* @param args command line arguments
*/
- public abstract void run(String format, String args[]);
+ public abstract void run(String format, String[] args);
/**
* Retrieves a description on how to use the tool.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
index 49a55d3..a01d354 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
@@ -70,7 +70,7 @@ public class DoccatTool extends BasicCmdLineTool {
while ((document = documentStream.read()) != null) {
String[] tokens = model.getFactory().getTokenizer().tokenize(document);
- double prob[] = doccat.categorize(tokens);
+ double[] prob = doccat.categorize(tokens);
String category = doccat.getBestCategory(prob);
DocumentSample sample = new DocumentSample(category, tokens);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
index f248a2c..7f2d334 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
@@ -93,7 +93,7 @@ public class EntityLinkerTool extends BasicCmdLineTool {
// Run entity linker ... and output result ...
StringBuilder text = new StringBuilder();
- Span sentences[] = new Span[document.size()];
+ Span[] sentences = new Span[document.size()];
Span[][] tokensBySentence = new Span[document.size()][];
Span[][] namesBySentence = new Span[document.size()][];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
index 153d6f7..0ee3738 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
@@ -68,14 +68,14 @@ public final class TokenNameFinderCrossValidatorTool
mlParams = new TrainingParameters();
}
- byte featureGeneratorBytes[] =
+ byte[] featureGeneratorBytes =
TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
Map<String, Object> resources =
TokenNameFinderTrainerTool.loadResources(params.getResources(), params.getFeaturegen());
if (params.getNameTypes() != null) {
- String nameTypes[] = params.getNameTypes().split(",");
+ String[] nameTypes = params.getNameTypes().split(",");
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
index d00e254..b3d5bba 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
@@ -96,7 +96,7 @@ public final class TokenNameFinderEvaluatorTool
}
if (params.getNameTypes() != null) {
- String nameTypes[] = params.getNameTypes().split(",");
+ String[] nameTypes = params.getNameTypes().split(",");
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
index 2476005..59b2f3a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
@@ -53,7 +53,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
System.out.println(getHelp());
} else {
- NameFinderME nameFinders[] = new NameFinderME[args.length];
+ NameFinderME[] nameFinders = new NameFinderME[args.length];
for (int i = 0; i < nameFinders.length; i++) {
TokenNameFinderModel model = new TokenNameFinderModelLoader().load(new File(args[i]));
@@ -71,7 +71,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
String line;
while ((line = untokenizedLineStream.read()) != null) {
- String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+ String[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
// A new line indicates a new document,
// adaptive data must be cleared for a new document
@@ -90,7 +90,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
// Simple way to drop intersecting spans, otherwise the
// NameSample is invalid
- Span reducedNames[] = NameFinderME.dropOverlappingSpans(
+ Span[] reducedNames = NameFinderME.dropOverlappingSpans(
names.toArray(new Span[names.size()]));
NameSample nameSample = new NameSample(whitespaceTokenizerLine,
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index fb73506..5bb18d2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -68,7 +68,7 @@ public final class TokenNameFinderTrainerTool
}
static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
- byte featureGeneratorBytes[] = null;
+ byte[] featureGeneratorBytes = null;
// load descriptor file into memory
if (featureGenDescriptorFile != null) {
@@ -118,7 +118,7 @@ public final class TokenNameFinderTrainerTool
}
}
- File resourceFiles[] = resourcePath.listFiles();
+ File[] resourceFiles = resourcePath.listFiles();
for (File resourceFile : resourceFiles) {
String resourceName = resourceFile.getName();
@@ -172,7 +172,7 @@ public final class TokenNameFinderTrainerTool
File modelOutFile = params.getModel();
- byte featureGeneratorBytes[] = openFeatureGeneratorBytes(params.getFeaturegen());
+ byte[] featureGeneratorBytes = openFeatureGeneratorBytes(params.getFeaturegen());
// TODO: Support Custom resources:
@@ -184,7 +184,7 @@ public final class TokenNameFinderTrainerTool
CmdLineUtil.checkOutputFile("name finder model", modelOutFile);
if (params.getNameTypes() != null) {
- String nameTypes[] = params.getNameTypes().split(",");
+ String[] nameTypes = params.getNameTypes().split(",");
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
index dc93226..3f1959e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
@@ -63,7 +63,7 @@ public final class POSTaggerTool extends BasicCmdLineTool {
String line;
while ((line = lineStream.read()) != null) {
- String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+ String[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
index 57176ae..30f408b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
@@ -58,7 +58,7 @@ public final class DictionaryDetokenizerTool extends BasicCmdLineTool {
while ((tokenizedLine = tokenizedLineStream.read()) != null) {
// white space tokenize line
- String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine);
+ String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine);
System.out.println(detokenizer.detokenize(tokens, null));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
index 3fd8986..10b9f37 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
@@ -275,7 +275,7 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
while ((line = lineReader.readLine()) != null) {
StringTokenizer whiteSpaceTokenizer = new StringTokenizer(line, " ");
- String tokens[] = new String[whiteSpaceTokenizer.countTokens()];
+ String[] tokens = new String[whiteSpaceTokenizer.countTokens()];
if (tokens.length > 0) {
int tokenIndex = 0;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
index c8ad3c3..88bf8f9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
@@ -32,7 +32,7 @@ public interface DocumentCategorizer {
* @param text the tokens of text to categorize
* @return per category probabilities
*/
- double[] categorize(String text[]);
+ double[] categorize(String[] text);
/**
* Categorizes the given text, provided in separate tokens.
@@ -43,7 +43,7 @@ public interface DocumentCategorizer {
* @deprecated will be removed after 1.7.1 release. Don't use it.
*/
@Deprecated
- double[] categorize(String text[], Map<String, Object> extraInformation);
+ double[] categorize(String[] text, Map<String, Object> extraInformation);
/**
* get the best category from previously generated outcome probabilities
@@ -101,7 +101,7 @@ public interface DocumentCategorizer {
* @param results the probabilities of each category
* @return the name of the outcome
*/
- String getAllResults(double results[]);
+ String getAllResults(double[] results);
/**
* Returns a map in which the key is the category name and the value is the score
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
index b1da3e3..e12f16b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
@@ -32,7 +32,7 @@ class DocumentCategorizerContextGenerator {
mFeatureGenerators = featureGenerators;
}
- public String[] getContext(String text[], Map<String, Object> extraInformation) {
+ public String[] getContext(String[] text, Map<String, Object> extraInformation) {
Collection<String> context = new LinkedList<>();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
index d2307e3..63e0768 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
@@ -57,9 +57,9 @@ public class DocumentCategorizerEvaluator extends Evaluator<DocumentSample> {
*/
public DocumentSample processSample(DocumentSample sample) {
- String document[] = sample.getText();
+ String[] document = sample.getText();
- double probs[] = categorizer.categorize(document, sample.getExtraInformation());
+ double[] probs = categorizer.categorize(document, sample.getExtraInformation());
String cat = categorizer.getBestCategory(probs);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
index 33151d9..e743b9d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
@@ -85,7 +85,7 @@ public class DocumentCategorizerME implements DocumentCategorizer {
* Categorizes the given text.
* @param text the text to categorize
*/
- public double[] categorize(String text[]) {
+ public double[] categorize(String[] text) {
return this.categorize(text, Collections.emptyMap());
}
@@ -225,7 +225,7 @@ public class DocumentCategorizerME implements DocumentCategorizer {
return model.getMaxentModel().getNumOutcomes();
}
- public String getAllResults(double results[]) {
+ public String getAllResults(double[] results) {
return model.getMaxentModel().getAllOutcomes(results);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
index f521738..3d107fa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
@@ -39,11 +39,11 @@ public class DocumentSample {
this(category, WhitespaceTokenizer.INSTANCE.tokenize(text));
}
- public DocumentSample(String category, String text[]) {
+ public DocumentSample(String category, String[] text) {
this(category, text, null);
}
- public DocumentSample(String category, String text[], Map<String, Object> extraInformation) {
+ public DocumentSample(String category, String[] text, Map<String, Object> extraInformation) {
Objects.requireNonNull(text, "text must not be null");
this.category = Objects.requireNonNull(category, "category must not be null");
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
index 13d9184..9054eb7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
@@ -45,13 +45,13 @@ public class DocumentSampleStream extends FilterObjectStream<String, DocumentSam
if (sampleString != null) {
// Whitespace tokenize entire string
- String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(sampleString);
+ String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(sampleString);
DocumentSample sample;
if (tokens.length > 1) {
String category = tokens[0];
- String docTokens[] = new String[tokens.length - 1];
+ String[] docTokens = new String[tokens.length - 1];
System.arraycopy(tokens, 1, docTokens, 0, tokens.length - 1);
sample = new DocumentSample(category, docTokens);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
index b3ac623..ff4ad34 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
@@ -87,7 +87,7 @@ public class BioNLP2004NameSampleStream implements ObjectStream<NameSample> {
if (line.contains("ABSTRACT TRUNCATED"))
continue;
- String fields[] = line.split("\t");
+ String[] fields = line.split("\t");
if (fields.length == 2) {
sentence.add(fields[0]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
index cd68148..f3c2a81 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
@@ -125,7 +125,7 @@ public class Conll02NameSampleStream implements ObjectStream<NameSample> {
continue;
}
- String fields[] = line.split(" ");
+ String[] fields = line.split(" ");
if (fields.length == 3) {
sentence.add(fields[0]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
index 6892605..5f1c082 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
@@ -93,7 +93,7 @@ public class Conll03NameSampleStream implements ObjectStream<NameSample> {
continue;
}
- String fields[] = line.split(" ");
+ String[] fields = line.split(" ");
// For English: WORD POS-TAG SC-TAG NE-TAG
if (LANGUAGE.EN.equals(lang) && fields.length == 4) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
index 74ae62a..9525ab6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
@@ -76,7 +76,7 @@ public class ConllXPOSSampleStream extends FilterObjectStream<String, POSSample>
final int minNumberOfFields = 5;
- String parts[] = line.split("\t");
+ String[] parts = line.split("\t");
if (parts.length >= minNumberOfFields) {
tokens.add(parts[1]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
index 931cb55..3a5621a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
@@ -43,7 +43,7 @@ public class DirectorySampleStream implements ObjectStream<File> {
private Stack<File> textFiles = new Stack<>();
- public DirectorySampleStream(File dirs[], FileFilter fileFilter, boolean recursive) {
+ public DirectorySampleStream(File[] dirs, FileFilter fileFilter, boolean recursive) {
this.fileFilter = fileFilter;
isRecursiveScan = recursive;
@@ -73,7 +73,7 @@ public class DirectorySampleStream implements ObjectStream<File> {
while (textFiles.isEmpty() && !directories.isEmpty()) {
File dir = directories.pop();
- File files[];
+ File[] files;
if (fileFilter != null) {
files = dir.listFiles(fileFilter);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
index d09c0b3..531a50f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
@@ -138,7 +138,7 @@ public class EvalitaNameSampleStream implements ObjectStream<NameSample> {
continue;
}
- String fields[] = line.split(" ");
+ String[] fields = line.split(" ");
// For Italian: WORD POS-TAG SC-TAG NE-TAG
if (LANGUAGE.IT.equals(lang) && fields.length == 4) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
index 321f7c4..1ca0484 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
@@ -87,7 +87,7 @@ public class LeipzigDoccatSampleStream extends
String line;
while (count < sentencesPerDocument && (line = samples.read()) != null) {
- String tokens[] = tokenizer.tokenize(line);
+ String[] tokens = tokenizer.tokenize(line);
if (tokens.length == 0) {
throw new IOException("Empty lines are not allowed!");
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
index f0aea5e..bd2453b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
@@ -51,7 +51,7 @@ public class LeipzigDocumentSampleStreamFactory
Parameters params = ArgumentParser.parse(args, Parameters.class);
File sentencesFileDir = params.getSentencesDir();
- File sentencesFiles[] = sentencesFileDir.listFiles(new FilenameFilter() {
+ File[] sentencesFiles = sentencesFileDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.contains("sentences") && name.endsWith(".txt");
@@ -59,7 +59,7 @@ public class LeipzigDocumentSampleStreamFactory
});
@SuppressWarnings("unchecked")
- ObjectStream<DocumentSample> sampleStreams[] =
+ ObjectStream<DocumentSample>[] sampleStreams =
new ObjectStream[sentencesFiles.length];
for (int i = 0; i < sentencesFiles.length; i++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
index 76e3d0f..efeddba 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
@@ -40,7 +40,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
static final int ID_OFFSET = 0;
static final int TYPE_OFFSET = 1;
- BratAnnotation parse(Span tokens[], CharSequence line) throws IOException {
+ BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
return null;
}
@@ -60,7 +60,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
private static final int END_OFFSET = 3;
@Override
- BratAnnotation parse(Span values[], CharSequence line) throws IOException {
+ BratAnnotation parse(Span[] values, CharSequence line) throws IOException {
if (values.length > 4) {
String type = values[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString();
@@ -111,7 +111,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
}
@Override
- BratAnnotation parse(Span tokens[], CharSequence line) throws IOException {
+ BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
return new RelationAnnotation(tokens[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString(),
tokens[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString(),
parseArg(tokens[ARG1_OFFSET].getCoveredText(line).toString()),
@@ -122,7 +122,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
static class EventAnnotationParser extends BratAnnotationParser {
@Override
- BratAnnotation parse(Span tokens[], CharSequence line) throws IOException {
+ BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
String[] typeParts = tokens[TYPE_OFFSET].getCoveredText(line).toString().split(":");
@@ -194,7 +194,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
String line = reader.readLine();
if (line != null) {
- Span tokens[] = WhitespaceTokenizer.INSTANCE.tokenizePos(line);
+ Span[] tokens = WhitespaceTokenizer.INSTANCE.tokenizePos(line);
if (tokens.length > 2) {
String annId = tokens[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java
index 16c9deb..1b9aee2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java
@@ -78,7 +78,7 @@ public class BratDocument {
StringBuilder text = new StringBuilder();
- char cbuf[] = new char[1024];
+ char[] cbuf = new char[1024];
int len;
while ((len = txtReader.read(cbuf)) > 0) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
index a569992..5a96d2d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
@@ -111,7 +111,7 @@ public class BratNameSampleStream extends SegmenterObjectStream<BratDocument, Na
String sentenceText = sentence.getCoveredText(
sample.getText()).toString();
- Span tokens[] = tokenizer.tokenizePos(sentenceText);
+ Span[] tokens = tokenizer.tokenizePos(sentenceText);
// Note:
// A begin and end token index can be identical, but map to different
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java
index 0367b95..b7dedbb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java
@@ -38,7 +38,7 @@ public class FileToByteArraySampleStream extends FilterObjectStream<File, byte[]
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
- byte buffer[] = new byte[1024];
+ byte[] buffer = new byte[1024];
int length;
while ((length = in.read(buffer, 0, buffer.length)) > 0) {
bytes.write(buffer, 0, length);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java
index 3ca641c..3b0476a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java
@@ -45,7 +45,7 @@ public class FileToStringSampleStream extends FilterObjectStream<File, String> {
StringBuilder text = new StringBuilder();
try {
- char buffer[] = new char[1024];
+ char[] buffer = new char[1024];
int length;
while ((length = in.read(buffer, 0, buffer.length)) > 0) {
text.append(buffer, 0, length);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
index 4d6d3a4..e25d674 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
@@ -106,7 +106,7 @@ public class MucNameContentHandler extends SgmlParser.ContentHandler {
@Override
public void characters(CharSequence chars) {
if (isInsideContentElement) {
- String tokens [] = tokenizer.tokenize(chars.toString());
+ String[] tokens = tokenizer.tokenize(chars.toString());
text.addAll(Arrays.asList(tokens));
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
index 41e5aa9..af2b5c8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
@@ -105,7 +105,7 @@ public class OntoNotesNameSampleStream extends
break;
}
- String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+ String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(line);
List<Span> entities = new LinkedList<>();
List<String> cleanedTokens = new ArrayList<>(tokens.length);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
index 7056538..a4d5c8c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
@@ -41,9 +41,9 @@ public class LemmaSampleSequenceStream implements SequenceStream {
LemmaSample sample = samples.read();
if (sample != null) {
- String sentence[] = sample.getTokens();
- String tags[] = sample.getTags();
- String preds[] = sample.getLemmas();
+ String[] sentence = sample.getTokens();
+ String[] tags = sample.getTags();
+ String[] preds = sample.getLemmas();
Event[] events = new Event[sentence.length];
for (int i = 0; i < sentence.length; i++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
index ddcaa6a..f21f9e3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
@@ -30,6 +30,6 @@ public interface Lemmatizer {
*
* @return an array of lemma classes for each token in the sequence.
*/
- String[] lemmatize(String[] toks, String tags[]);
+ String[] lemmatize(String[] toks, String[] tags);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
index 4ce8b7e..949a408 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
@@ -171,7 +171,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> {
public Sequence bestSequence(T[] sequence, Object[] additionalContext,
BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
- Sequence sequences[] = bestSequences(1, sequence, additionalContext, cg, validator);
+ Sequence[] sequences = bestSequences(1, sequence, additionalContext, cg, validator);
if (sequences.length > 0)
return sequences[0];
@@ -181,7 +181,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> {
@Override
public String[] getOutcomes() {
- String outcomes[] = new String[model.getNumOutcomes()];
+ String[] outcomes = new String[model.getNumOutcomes()];
for (int i = 0; i < model.getNumOutcomes(); i++) {
outcomes[i] = model.getOutcome(i);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
index 14c7fa3..b8b830e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
@@ -156,7 +156,7 @@ public final class GISModel extends AbstractModel {
public static double[] eval(int[] context, float[] values, double[] prior,
EvalParameters model) {
Context[] params = model.getParams();
- int numfeats[] = new int[model.getNumOutcomes()];
+ int[] numfeats = new int[model.getNumOutcomes()];
int[] activeOutcomes;
double[] activeParameters;
double value = 1;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java
index ea26781..c0c8b1d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java
@@ -42,7 +42,7 @@ public interface MaxentModel {
* outcomes, all of which sum to 1.
* @return an array of the probabilities for each of the different outcomes, all of which sum to 1.
**/
- double[] eval(String[] context, double probs[]);
+ double[] eval(String[] context, double[] probs);
/**
* Evaluates a contexts with the specified context values.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
index 3e4cef1..7a50055 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
@@ -250,7 +250,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
//training feature count computation
for (int ei = 0; ei < events.length; ei++, oei++) {
String[] contextStrings = events[ei].getContext();
- float values[] = events[ei].getValues();
+ float[] values = events[ei].getValues();
int oi = omap.get(events[ei].getOutcome());
for (int ci = 0; ci < contextStrings.length; ci++) {
float value = 1;
@@ -272,7 +272,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
// {System.err.print(" "+taggerEvents[ei].getOutcome());} System.err.println();
for (Event taggerEvent : taggerEvents) {
String[] contextStrings = taggerEvent.getContext();
- float values[] = taggerEvent.getValues();
+ float[] values = taggerEvent.getValues();
int oi = omap.get(taggerEvent.getOutcome());
for (int ci = 0; ci < contextStrings.length; ci++) {
float value = 1;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
index 284ae87..2218021 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
@@ -76,7 +76,7 @@ public class BioCodec implements SequenceCodec<String> {
return spans.toArray(new Span[spans.size()]);
}
- public String[] encode(Span names[], int length) {
+ public String[] encode(Span[] names, int length) {
String[] outcomes = new String[length];
for (int i = 0; i < outcomes.length; i++) {
outcomes[i] = BioCodec.OTHER;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java
index 7f913f9..83318e4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java
@@ -36,7 +36,7 @@ import opennlp.tools.util.featuregen.WindowFeatureGenerator;
*/
public class DefaultNameContextGenerator implements NameContextGenerator {
- private AdaptiveFeatureGenerator featureGenerators[];
+ private AdaptiveFeatureGenerator[] featureGenerators;
@Deprecated
private static AdaptiveFeatureGenerator windowFeatures = new CachedFeatureGenerator(
@@ -73,7 +73,7 @@ public class DefaultNameContextGenerator implements NameContextGenerator {
}
public void addFeatureGenerator(AdaptiveFeatureGenerator generator) {
- AdaptiveFeatureGenerator generators[] = featureGenerators;
+ AdaptiveFeatureGenerator[] generators = featureGenerators;
featureGenerators = new AdaptiveFeatureGenerator[featureGenerators.length + 1];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
index 8b655eb..d186ef9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
@@ -62,7 +62,7 @@ public class DictionaryNameFinder implements TokenNameFinder {
for (int offsetFrom = 0; offsetFrom < textTokenized.length; offsetFrom++) {
Span nameFound = null;
- String tokensSearching[];
+ String[] tokensSearching;
for (int offsetTo = offsetFrom; offsetTo < textTokenized.length; offsetTo++) {
int lengthSearching = offsetTo - offsetFrom + 1;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
index 0afd3c1..f67163c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
@@ -132,7 +132,7 @@ public class NameFinderEventStream extends opennlp.tools.util.AbstractEventStrea
overrideType(names);
}
- String outcomes[] = codec.encode(names, sample.getSentence().length);
+ String[] outcomes = codec.encode(names, sample.getSentence().length);
// String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length);
additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext());
String[] tokens = new String[sample.getSentence().length];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
index 5a16f34..1d52473 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
@@ -302,7 +302,7 @@ public class NameFinderME implements TokenNameFinder {
*
* @return non-overlapping spans
*/
- public static Span[] dropOverlappingSpans(Span spans[]) {
+ public static Span[] dropOverlappingSpans(Span[] spans) {
List<Span> sortedSpans = new ArrayList<>(spans.length);
Collections.addAll(sortedSpans, spans);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
index f2f4578..8858ceb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
@@ -179,7 +179,7 @@ public class NameSample {
return result.toString();
}
- private static String errorTokenWithContext(String sentence[], int index) {
+ private static String errorTokenWithContext(String[] sentence, int index) {
StringBuilder errorString = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
index cf19bf2..8064d6b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
@@ -86,8 +86,8 @@ public class NameSampleSequenceStream implements SequenceStream {
public Sequence read() throws IOException {
NameSample sample = psi.read();
if (sample != null) {
- String sentence[] = sample.getSentence();
- String tags[] = seqCodec.encode(sample.getNames(), sentence.length);
+ String[] sentence = sample.getSentence();
+ String[] tags = seqCodec.encode(sample.getNames(), sentence.length);
Event[] events = new Event[sentence.length];
for (int i = 0; i < sentence.length; i++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
index 5d9847d..7d7c6bd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
@@ -32,7 +32,7 @@ import opennlp.tools.util.Span;
*/
public final class RegexNameFinder implements TokenNameFinder {
- private Pattern mPatterns[];
+ private Pattern[] mPatterns;
private String sType;
private Map<String, Pattern[]> regexMap;
@@ -40,7 +40,7 @@ public final class RegexNameFinder implements TokenNameFinder {
this.regexMap = Objects.requireNonNull(regexMap, "regexMap must not be null");
}
- public RegexNameFinder(Pattern patterns[], String type) {
+ public RegexNameFinder(Pattern[] patterns, String type) {
if (patterns == null || patterns.length == 0) {
throw new IllegalArgumentException("patterns must not be null or empty!");
}
@@ -55,7 +55,7 @@ public final class RegexNameFinder implements TokenNameFinder {
* {@link #RegexNameFinder(Map)}
*/
@Deprecated
- public RegexNameFinder(Pattern patterns[]) {
+ public RegexNameFinder(Pattern[] patterns) {
if (patterns == null || patterns.length == 0) {
throw new IllegalArgumentException("patterns must not be null or empty!");
}
@@ -65,7 +65,7 @@ public final class RegexNameFinder implements TokenNameFinder {
}
@Override
- public Span[] find(String tokens[]) {
+ public Span[] find(String[] tokens) {
Map<Integer, Integer> sentencePosTokenMap = new HashMap<>();
StringBuilder sentenceString = new StringBuilder(tokens.length * 10);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java
index 3b5162e..c9de988 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java
@@ -30,7 +30,7 @@ public interface TokenNameFinder {
* @param tokens an array of the tokens or words of the sequence, typically a sentence.
* @return an array of spans for each of the names identified.
*/
- Span[] find(String tokens[]);
+ Span[] find(String[] tokens);
/**
* Forgets all adaptive data which was collected during previous
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
index 6a68b86..df8866f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
@@ -36,9 +36,9 @@ public class TokenNameFinderCrossValidator {
private class DocumentSample {
- private NameSample samples[];
+ private NameSample[] samples;
- DocumentSample(NameSample samples[]) {
+ DocumentSample(NameSample[] samples) {
this.samples = samples;
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java
index d58527b..a84ebb8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java
@@ -73,8 +73,8 @@ public class TokenNameFinderEvaluator extends Evaluator<NameSample> {
nameFinder.clearAdaptiveData();
}
- Span predictedNames[] = nameFinder.find(reference.getSentence());
- Span references[] = reference.getNames();
+ Span[] predictedNames = nameFinder.find(reference.getSentence());
+ Span[] references = reference.getNames();
// OPENNLP-396 When evaluating with a file in the old format
// the type of the span is null, but must be set to default to match
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
index e7f0190..f570be3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
@@ -80,7 +80,7 @@ public class TokenNameFinderFactory extends BaseToolFactory {
throw new IllegalStateException("Classpath must contain ner-default-features.xml file!");
}
- byte buf[] = new byte[1024];
+ byte[] buf = new byte[1024];
int len;
while ((len = in.read(buf)) > 0) {
bytes.write(buf, 0, len);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
index 09eefc5..5b72449 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
@@ -211,7 +211,7 @@ public class TokenNameFinderModel extends BaseModel {
private boolean isModelValid(MaxentModel model) {
- String outcomes[] = new String[model.getNumOutcomes()];
+ String[] outcomes = new String[model.getNumOutcomes()];
for (int i = 0; i < model.getNumOutcomes(); i++) {
outcomes[i] = model.getOutcome(i);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
index b0fc3e4..a553328 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
@@ -388,9 +388,9 @@ public abstract class AbstractBottomUpParser implements Parser {
protected Parse[] advanceChunks(final Parse p, double minChunkScore) {
// chunk
Parse[] children = p.getChildren();
- String words[] = new String[children.length];
- String ptags[] = new String[words.length];
- double probs[] = new double[words.length];
+ String[] words = new String[children.length];
+ String[] ptags = new String[words.length];
+ double[] probs = new double[words.length];
for (int i = 0, il = children.length; i < il; i++) {
Parse sp = children[i];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
index b9733b6..7d37fcb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
@@ -160,7 +160,7 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
features.add(ctbo0 + "," + ct1);
features.add(ct0 + "," + ctbo1);
features.add(ctbo0 + "," + ctbo1);
- String contexts[] = features.toArray(new String[features.size()]);
+ String[] contexts = features.toArray(new String[features.size()]);
if (contextsCache != null) {
contextsCache.put(cacheKey,contexts);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
index b19d480..7d3c8f7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
@@ -35,7 +35,7 @@ public class ParserChunkerFactory extends ChunkerFactory {
MaxentModel model = artifactProvider.getArtifact("chunker.model");
- String outcomes[] = new String[model.getNumOutcomes()];
+ String[] outcomes = new String[model.getNumOutcomes()];
for (int i = 0; i < outcomes.length; i++) {
outcomes[i] = model.getOutcome(i);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
index 6b748a6..ef15bf5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
@@ -27,7 +27,7 @@ public class ParserChunkerSequenceValidator implements SequenceValidator<String>
private Map<String, String> continueStartMap;
- public ParserChunkerSequenceValidator(String outcomes[]) {
+ public ParserChunkerSequenceValidator(String[] outcomes) {
continueStartMap = new HashMap<>(outcomes.length);
for (int oi = 0, on = outcomes.length; oi < on; oi++) {
[16/24] opennlp git commit: OPENNLP-997: Exclude the generated
stemmer code from the coverage report, this closes apache/opennlp#135
Posted by co...@apache.org.
OPENNLP-997: Exclude the generated stemmer code from the coverage report, this closes apache/opennlp#135
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/f60bfa2c
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/f60bfa2c
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/f60bfa2c
Branch: refs/heads/OPENNLP-778
Commit: f60bfa2ca28251c841e39899e1714dd1af21f192
Parents: c17c551
Author: smarthi <sm...@apache.org>
Authored: Tue Feb 28 08:28:05 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Tue Feb 28 08:28:05 2017 -0500
----------------------------------------------------------------------
pom.xml | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/f60bfa2c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 45d3c37..12c9ee6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -126,7 +126,7 @@
<checkstyle.plugin.version>2.17</checkstyle.plugin.version>
<opennlp.forkCount>1.0C</opennlp.forkCount>
<coveralls.maven.plugin>4.3.0</coveralls.maven.plugin>
- <jacoco.maven.plugin>0.7.8</jacoco.maven.plugin>
+ <jacoco.maven.plugin>0.7.9</jacoco.maven.plugin>
<maven.surefire.plugin>2.19.1</maven.surefire.plugin>
</properties>
@@ -185,7 +185,13 @@
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
- <version>0.7.8</version>
+ <version>${jacoco.maven.plugin}</version>
+ <configuration>
+ <excludes>
+ <exclude>**/stemmer/*</exclude>
+ <exclude>**/stemmer/snowball/*</exclude>
+ </excludes>
+ </configuration>
<executions>
<execution>
<id>jacoco-prepare-agent</id>
@@ -222,6 +228,10 @@
<version>${maven.surefire.plugin}</version>
<configuration>
<forkCount>${opennlp.forkCount}</forkCount>
+ <excludes>
+ <exclude>**/stemmer/*</exclude>
+ <exclude>**/stemmer/snowball/*</exclude>
+ </excludes>
</configuration>
</plugin>
[23/24] opennlp git commit: OPENNLP-1002 Remove deprecated GIS class
Posted by co...@apache.org.
OPENNLP-1002 Remove deprecated GIS class
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/cb9b00a6
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/cb9b00a6
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/cb9b00a6
Branch: refs/heads/OPENNLP-778
Commit: cb9b00a64c5173fdbbc34c16648e2f643a18d622
Parents: a9cfd7e
Author: J�rn Kottmann <jo...@apache.org>
Authored: Sun Mar 12 11:10:43 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Mon Mar 13 16:59:22 2017 +0100
----------------------------------------------------------------------
.../cmdline/parser/BuildModelUpdaterTool.java | 7 +-
.../cmdline/parser/CheckModelUpdaterTool.java | 7 +-
.../main/java/opennlp/tools/ml/maxent/GIS.java | 303 -------------------
.../tools/ml/maxent/GISIndexingTest.java | 78 +++--
.../tools/ml/maxent/ScaleDoesntMatterTest.java | 20 +-
5 files changed, 80 insertions(+), 335 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/cb9b00a6/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
index 327355b..7efd342 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
@@ -20,7 +20,8 @@ package opennlp.tools.cmdline.parser;
import java.io.IOException;
import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.parser.Parse;
@@ -28,6 +29,7 @@ import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.parser.chunking.ParserEventStream;
import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.model.ModelUtil;
public final class BuildModelUpdaterTool extends ModelUpdaterTool {
@@ -50,7 +52,8 @@ public final class BuildModelUpdaterTool extends ModelUpdaterTool {
ObjectStream<Event> bes = new ParserEventStream(parseSamples,
originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict);
- GIS trainer = new GIS();
+ EventTrainer trainer = TrainerFactory.getEventTrainer(
+ ModelUtil.createDefaultTrainingParameters(), null);
MaxentModel buildModel = trainer.train(bes);
parseSamples.close();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/cb9b00a6/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
index 55e96ba..0c98812 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
@@ -20,7 +20,8 @@ package opennlp.tools.cmdline.parser;
import java.io.IOException;
import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.parser.Parse;
@@ -28,6 +29,7 @@ import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.parser.chunking.ParserEventStream;
import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.model.ModelUtil;
// trains a new check model ...
public final class CheckModelUpdaterTool extends ModelUpdaterTool {
@@ -51,7 +53,8 @@ public final class CheckModelUpdaterTool extends ModelUpdaterTool {
ObjectStream<Event> bes = new ParserEventStream(parseSamples,
originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict);
- GIS trainer = new GIS();
+ EventTrainer trainer = TrainerFactory.getEventTrainer(
+ ModelUtil.createDefaultTrainingParameters(), null);
MaxentModel checkModel = trainer.train(bes);
parseSamples.close();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/cb9b00a6/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
deleted file mode 100644
index 97c214d..0000000
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.ml.maxent;
-
-import java.io.IOException;
-
-import opennlp.tools.ml.AbstractEventTrainer;
-import opennlp.tools.ml.model.AbstractModel;
-import opennlp.tools.ml.model.DataIndexer;
-import opennlp.tools.ml.model.Event;
-import opennlp.tools.ml.model.Prior;
-import opennlp.tools.ml.model.UniformPrior;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.TrainingParameters;
-
-/**
- * A Factory class which uses instances of GISTrainer to create and train
- * GISModels.
- * @deprecated use {@link GISTrainer}
- */
-@Deprecated
-public class GIS extends AbstractEventTrainer {
-
- public static final String MAXENT_VALUE = "MAXENT";
-
- /**
- * Set this to false if you don't want messages about the progress of model
- * training displayed. Alternately, you can use the overloaded version of
- * trainModel() to conditionally enable progress messages.
- */
- public static boolean PRINT_MESSAGES = true;
-
- /**
- * If we are using smoothing, this is used as the "number" of times we want
- * the trainer to imagine that it saw a feature that it actually didn't see.
- * Defaulted to 0.1.
- */
- private static final double SMOOTHING_OBSERVATION = 0.1;
-
- private static final String SMOOTHING_PARAM = "smoothing";
- private static final boolean SMOOTHING_DEFAULT = false;
-
- public GIS() {
- }
-
- public GIS(TrainingParameters parameters) {
- super(parameters);
- }
-
- public boolean isValid() {
-
- if (!super.isValid()) {
- return false;
- }
-
- String algorithmName = getAlgorithm();
-
- return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName)));
- }
-
- public boolean isSortAndMerge() {
- return true;
- }
-
- public AbstractModel doTrain(DataIndexer indexer) throws IOException {
- int iterations = getIterations();
-
- AbstractModel model;
-
- boolean printMessages = trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT);
- boolean smoothing = trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT);
- int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1);
-
- model = trainModel(iterations, indexer, printMessages, smoothing, null, threads);
-
- return model;
- }
-
- // << members related to AbstractEventTrainer
-
- /**
- * Train a model using the GIS algorithm, assuming 100 iterations and no
- * cutoff.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream) throws IOException {
- return trainModel(eventStream, 100, 0, false, PRINT_MESSAGES);
- }
-
- /**
- * Train a model using the GIS algorithm, assuming 100 iterations and no
- * cutoff.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream, boolean smoothing)
- throws IOException {
- return trainModel(eventStream, 100, 0, smoothing, PRINT_MESSAGES);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @param iterations
- * The number of GIS iterations to perform.
- * @param cutoff
- * The number of times a feature must be seen in order to be relevant
- * for training.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
- int cutoff) throws IOException {
- return trainModel(eventStream, iterations, cutoff, false, PRINT_MESSAGES);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @param iterations
- * The number of GIS iterations to perform.
- * @param cutoff
- * The number of times a feature must be seen in order to be relevant
- * for training.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @param printMessagesWhileTraining
- * Determines whether training status messages are written to STDOUT.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
- int cutoff, boolean smoothing, boolean printMessagesWhileTraining)
- throws IOException {
- GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
- trainer.setSmoothing(smoothing);
- trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
- return trainer.trainModel(eventStream, iterations, cutoff);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param eventStream
- * The EventStream holding the data on which this model will be
- * trained.
- * @param iterations
- * The number of GIS iterations to perform.
- * @param cutoff
- * The number of times a feature must be seen in order to be relevant
- * for training.
- * @param sigma
- * The standard deviation for the gaussian smoother.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
- int cutoff, double sigma) throws IOException {
- GISTrainer trainer = new GISTrainer(PRINT_MESSAGES);
- if (sigma > 0) {
- trainer.setGaussianSigma(sigma);
- }
- return trainer.trainModel(eventStream, iterations, cutoff);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) {
- return trainModel(iterations, indexer, true, smoothing, null, 1);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer) {
- return trainModel(iterations, indexer, true, false, null, 1);
- }
-
- /**
- * Train a model using the GIS algorithm with the specified number of
- * iterations, data indexer, and prior.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @param modelPrior
- * The prior distribution for the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer,
- Prior modelPrior, int cutoff) {
- return trainModel(iterations, indexer, true, false, modelPrior, cutoff);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @param printMessagesWhileTraining
- * Determines whether training status messages are written to STDOUT.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @param modelPrior
- * The prior distribution for the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer,
- boolean printMessagesWhileTraining, boolean smoothing,
- Prior modelPrior) {
- return trainModel(iterations, indexer, printMessagesWhileTraining, smoothing, modelPrior, 1);
- }
-
- /**
- * Train a model using the GIS algorithm.
- *
- * @param iterations
- * The number of GIS iterations to perform.
- * @param indexer
- * The object which will be used for event compilation.
- * @param printMessagesWhileTraining
- * Determines whether training status messages are written to STDOUT.
- * @param smoothing
- * Defines whether the created trainer will use smoothing while
- * training the model.
- * @param modelPrior
- * The prior distribution for the model.
- * @return The newly trained model, which can be used immediately or saved to
- * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
- */
- public static GISModel trainModel(int iterations, DataIndexer indexer,
- boolean printMessagesWhileTraining, boolean smoothing,
- Prior modelPrior, int threads) {
- GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
- trainer.setSmoothing(smoothing);
- trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
- if (modelPrior == null) {
- modelPrior = new UniformPrior();
- }
- return trainer.trainModel(iterations, indexer, modelPrior, threads);
- }
-}
-
-
-
http://git-wip-us.apache.org/repos/asf/opennlp/blob/cb9b00a6/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
index 6922603..c8bc27f 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
@@ -17,6 +17,7 @@
package opennlp.tools.ml.maxent;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -26,6 +27,7 @@ import org.junit.Assert;
import org.junit.Test;
import opennlp.tools.ml.AbstractEventTrainer;
+import opennlp.tools.ml.AbstractTrainer;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.maxent.quasinewton.QNTrainer;
@@ -36,6 +38,7 @@ import opennlp.tools.ml.model.Event;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
public class GISIndexingTest {
@@ -58,40 +61,63 @@ public class GISIndexingTest {
* Test the GIS.trainModel(ObjectStream<Event> eventStream) method
*/
@Test
- public void testGISTrainSignature1() throws Exception {
- ObjectStream<Event> eventStream = createEventStream();
- Assert.assertNotNull(GIS.trainModel(eventStream));
- eventStream.close();
+ public void testGISTrainSignature1() throws IOException {
+ try (ObjectStream<Event> eventStream = createEventStream()) {
+ TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+ params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+
+ EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
+
+ Assert.assertNotNull(trainer.train(eventStream));
+ }
}
/*
* Test the GIS.trainModel(ObjectStream<Event> eventStream,boolean smoothing) method
*/
@Test
- public void testGISTrainSignature2() throws Exception {
- ObjectStream<Event> eventStream = createEventStream();
- Assert.assertNotNull(GIS.trainModel(eventStream,true));
- eventStream.close();
+ public void testGISTrainSignature2() throws IOException {
+ try (ObjectStream<Event> eventStream = createEventStream()) {
+ TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+ params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+ params.put("smoothing", "true");
+ EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
+
+ Assert.assertNotNull(trainer.train(eventStream));
+ }
}
/*
* Test the GIS.trainModel(ObjectStream<Event> eventStream, int iterations, int cutoff) method
*/
@Test
- public void testGISTrainSignature3() throws Exception {
- ObjectStream<Event> eventStream = createEventStream();
- Assert.assertNotNull(GIS.trainModel(eventStream,10,1));
- eventStream.close();
+ public void testGISTrainSignature3() throws IOException {
+ try (ObjectStream<Event> eventStream = createEventStream()) {
+ TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+ params.put(AbstractTrainer.ITERATIONS_PARAM, "10");
+ params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+
+ EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
+
+ Assert.assertNotNull(trainer.train(eventStream));
+ }
}
/*
* Test the GIS.trainModel(ObjectStream<Event> eventStream, int iterations, int cutoff, double sigma) method
*/
@Test
- public void testGISTrainSignature4() throws Exception {
- ObjectStream<Event> eventStream = createEventStream();
- Assert.assertNotNull(GIS.trainModel(eventStream,10,1,0.01));
- eventStream.close();
+ public void testGISTrainSignature4() throws IOException {
+ try (ObjectStream<Event> eventStream = createEventStream()) {
+ TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+ params.put(AbstractTrainer.ITERATIONS_PARAM, "10");
+ params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+ GISTrainer trainer = (GISTrainer) TrainerFactory.getEventTrainer(params, null);
+ trainer.setGaussianSigma(0.01);
+
+ Assert.assertNotNull(trainer.trainModel(eventStream));
+ }
}
/*
@@ -99,14 +125,22 @@ public class GISIndexingTest {
* boolean smoothing, boolean printMessagesWhileTraining)) method
*/
@Test
- public void testGISTrainSignature5() throws Exception {
- ObjectStream<Event> eventStream = createEventStream();
- Assert.assertNotNull(GIS.trainModel(eventStream,10,1,false,false));
- eventStream.close();
+ public void testGISTrainSignature5() throws IOException {
+ try (ObjectStream<Event> eventStream = createEventStream()) {
+ TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+ params.put(AbstractTrainer.ITERATIONS_PARAM, "10");
+ params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+ params.put("smoothing", "false");
+ params.put(AbstractTrainer.VERBOSE_PARAM, "false");
+
+ EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
+ Assert.assertNotNull(trainer.train(eventStream));
+ }
}
@Test
- public void testIndexingWithTrainingParameters() throws Exception {
+ public void testIndexingWithTrainingParameters() throws IOException {
ObjectStream<Event> eventStream = createEventStream();
TrainingParameters parameters = TrainingParameters.defaultParams();
@@ -147,7 +181,7 @@ public class GISIndexingTest {
}
@Test
- public void testIndexingFactory() throws Exception {
+ public void testIndexingFactory() throws IOException {
Map<String,String> myReportMap = new HashMap<>();
ObjectStream<Event> eventStream = createEventStream();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/cb9b00a6/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
index 76a4813..1e5c8a3 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
@@ -25,6 +25,8 @@ import org.junit.Before;
import org.junit.Test;
import opennlp.tools.ml.AbstractTrainer;
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.model.DataIndexer;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
@@ -34,6 +36,7 @@ import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
public class ScaleDoesntMatterTest {
@@ -52,7 +55,6 @@ public class ScaleDoesntMatterTest {
* predicates doesn't matter when it comes the probability assigned to each
* outcome. Strangely, if we use (1,2) and (10,20) there's no difference. If
* we use (0.1,0.2) and (10,20) there is a difference.
- *
*/
@Test
public void testScaleResults() throws Exception {
@@ -68,8 +70,11 @@ public class ScaleDoesntMatterTest {
new PlainTextByLineStream(new MockInputStreamFactory(smallValues), StandardCharsets.UTF_8));
testDataIndexer.index(smallEventStream);
- MaxentModel smallModel = GIS.trainModel(100,
- testDataIndexer, false);
+
+ EventTrainer smallModelTrainer = TrainerFactory.getEventTrainer(
+ ModelUtil.createDefaultTrainingParameters(), null);
+
+ MaxentModel smallModel = smallModelTrainer.train(testDataIndexer);
String[] contexts = smallTest.split(" ");
float[] values = RealValueFileEventStream.parseContexts(contexts);
double[] smallResults = smallModel.eval(contexts, values);
@@ -81,13 +86,16 @@ public class ScaleDoesntMatterTest {
new PlainTextByLineStream(new MockInputStreamFactory(largeValues), StandardCharsets.UTF_8));
testDataIndexer.index(largeEventStream);
- MaxentModel largeModel = GIS.trainModel(100,
- testDataIndexer, false);
+
+ EventTrainer largeModelTrainer = TrainerFactory.getEventTrainer(
+ ModelUtil.createDefaultTrainingParameters(), null);
+
+ MaxentModel largeModel = largeModelTrainer.train(testDataIndexer);
contexts = largeTest.split(" ");
values = RealValueFileEventStream.parseContexts(contexts);
double[] largeResults = largeModel.eval(contexts, values);
- String largeResultString = smallModel.getAllOutcomes(largeResults);
+ String largeResultString = largeModel.getAllOutcomes(largeResults);
System.out.println("largeResults: " + largeResultString);
Assert.assertEquals(smallResults.length, largeResults.length);
[15/24] opennlp git commit: OPENNLP-994: Remove deprecated methods
from the Document Categorizer, this closes apache/opennlp#133
Posted by co...@apache.org.
OPENNLP-994: Remove deprecated methods from the Document Categorizer, this closes apache/opennlp#133
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/c17c5511
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/c17c5511
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/c17c5511
Branch: refs/heads/OPENNLP-778
Commit: c17c55110b216ed3d5e0adb06734677a9cb04abd
Parents: a5bdd60
Author: smarthi <sm...@apache.org>
Authored: Mon Feb 27 17:23:40 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Mon Feb 27 17:23:40 2017 -0500
----------------------------------------------------------------------
.../doccat/DoccatCrossValidatorTool.java | 7 +-
.../tools/cmdline/doccat/DoccatTool.java | 11 +-
.../tools/cmdline/doccat/DoccatTrainerTool.java | 5 +-
.../opennlp/tools/doccat/DoccatFactory.java | 93 +----------------
.../tools/doccat/DocumentCategorizer.java | 54 ++--------
.../doccat/DocumentCategorizerEvaluator.java | 2 +-
.../tools/doccat/DocumentCategorizerME.java | 101 ++-----------------
.../opennlp/tools/doccat/DocumentSample.java | 6 --
.../formats/LeipzigDoccatSampleStream.java | 19 ++--
.../tools/doccat/DocumentCategorizerMETest.java | 18 ++--
.../tools/doccat/DocumentCategorizerNBTest.java | 17 ++--
.../tools/doccat/DocumentSampleTest.java | 4 +-
.../doccat/AbstractDocumentCategorizer.java | 29 +++---
.../java/opennlp/uima/util/AnnotatorUtil.java | 6 +-
14 files changed, 66 insertions(+), 306 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
index f0f1712..a73aba7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
@@ -36,7 +36,6 @@ import opennlp.tools.doccat.DoccatEvaluationMonitor;
import opennlp.tools.doccat.DoccatFactory;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.doccat.FeatureGenerator;
-import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.eval.EvaluationMonitor;
import opennlp.tools.util.model.ModelUtil;
@@ -84,16 +83,12 @@ public final class DoccatCrossValidatorTool extends
FeatureGenerator[] featureGenerators = DoccatTrainerTool
.createFeatureGenerators(params.getFeatureGenerators());
- Tokenizer tokenizer = DoccatTrainerTool.createTokenizer(params
- .getTokenizer());
-
DoccatEvaluationMonitor[] listenersArr = listeners
.toArray(new DoccatEvaluationMonitor[listeners.size()]);
DoccatCrossValidator validator;
try {
- DoccatFactory factory = DoccatFactory.create(params.getFactory(),
- tokenizer, featureGenerators);
+ DoccatFactory factory = DoccatFactory.create(params.getFactory(), featureGenerators);
validator = new DoccatCrossValidator(params.getLang(), mlParams,
factory, listenersArr);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
index a01d354..49a640c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
@@ -28,6 +28,7 @@ import opennlp.tools.cmdline.SystemInputStreamFactory;
import opennlp.tools.doccat.DoccatModel;
import opennlp.tools.doccat.DocumentCategorizerME;
import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ParagraphStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -36,7 +37,7 @@ public class DoccatTool extends BasicCmdLineTool {
@Override
public String getShortDescription() {
- return "learnable document categorizer";
+ return "learned document categorizer";
}
@Override
@@ -53,7 +54,7 @@ public class DoccatTool extends BasicCmdLineTool {
DoccatModel model = new DoccatModelLoader().load(new File(args[0]));
- DocumentCategorizerME doccat = new DocumentCategorizerME(model);
+ DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model);
/*
* moved initialization to the try block to catch new IOException
@@ -68,10 +69,10 @@ public class DoccatTool extends BasicCmdLineTool {
new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()));
String document;
while ((document = documentStream.read()) != null) {
- String[] tokens = model.getFactory().getTokenizer().tokenize(document);
+ String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(document);
- double[] prob = doccat.categorize(tokens);
- String category = doccat.getBestCategory(prob);
+ double[] prob = documentCategorizerME.categorize(tokens);
+ String category = documentCategorizerME.getBestCategory(prob);
DocumentSample sample = new DocumentSample(category, tokens);
System.out.println(sample.toString());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
index 6ef5d88..8ebb5a8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
@@ -66,12 +66,9 @@ public class DoccatTrainerTool
FeatureGenerator[] featureGenerators = createFeatureGenerators(params
.getFeatureGenerators());
- Tokenizer tokenizer = createTokenizer(params.getTokenizer());
-
DoccatModel model;
try {
- DoccatFactory factory = DoccatFactory.create(params.getFactory(),
- tokenizer, featureGenerators);
+ DoccatFactory factory = DoccatFactory.create(params.getFactory(), featureGenerators);
model = DocumentCategorizerME.train(params.getLang(), sampleStream,
mlParams, factory);
} catch (IOException e) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
index a6c815b..babab7c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
@@ -22,8 +22,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ext.ExtensionLoader;
@@ -34,47 +32,17 @@ import opennlp.tools.util.ext.ExtensionLoader;
public class DoccatFactory extends BaseToolFactory {
private static final String FEATURE_GENERATORS = "doccat.featureGenerators";
- private static final String TOKENIZER_NAME = "doccat.tokenizer";
private FeatureGenerator[] featureGenerators;
- private Tokenizer tokenizer;
/**
* Creates a {@link DoccatFactory} that provides the default implementation of
* the resources.
*/
- public DoccatFactory() {
- this.tokenizer = WhitespaceTokenizer.INSTANCE;
- }
+ public DoccatFactory() {}
public DoccatFactory(final FeatureGenerator[] featureGenerators) {
- this.tokenizer = WhitespaceTokenizer.INSTANCE;
- this.featureGenerators = featureGenerators;
- }
-
- /**
- * Creates a {@link DoccatFactory}. Use this constructor to programmatically
- * create a factory.
- *
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- * @param tokenizer the tokenizer
- * @param featureGenerators the feature generators
- */
- @Deprecated
- public DoccatFactory(Tokenizer tokenizer, FeatureGenerator[] featureGenerators) {
- this.init(tokenizer, featureGenerators);
- }
-
- /**
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- * @param tokenizer the tokenizer
- * @param featureGenerators feature generators
- */
- @Deprecated
- protected void init(Tokenizer tokenizer, FeatureGenerator[] featureGenerators) {
-
this.featureGenerators = featureGenerators;
- this.tokenizer = tokenizer;
}
protected void init(FeatureGenerator[] featureGenerators) {
@@ -85,11 +53,6 @@ public class DoccatFactory extends BaseToolFactory {
public Map<String, String> createManifestEntries() {
Map<String, String> manifestEntries = super.createManifestEntries();
- if (getTokenizer() != null) {
- manifestEntries.put(TOKENIZER_NAME, getTokenizer().getClass()
- .getCanonicalName());
- }
-
if (getFeatureGenerators() != null) {
manifestEntries.put(FEATURE_GENERATORS, featureGeneratorsAsString());
}
@@ -115,31 +78,6 @@ public class DoccatFactory extends BaseToolFactory {
// nothing to validate
}
- /**
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- public static DoccatFactory create(String subclassName, Tokenizer tokenizer,
- FeatureGenerator[] featureGenerators) throws InvalidFormatException {
- if (subclassName == null) {
- // will create the default factory
- return new DoccatFactory(tokenizer, featureGenerators);
- }
- try {
- DoccatFactory theFactory = ExtensionLoader.instantiateExtension(
- DoccatFactory.class, subclassName);
- theFactory.init(tokenizer, featureGenerators);
- return theFactory;
- } catch (Exception e) {
- String msg = "Could not instantiate the " + subclassName
- + ". The initialization throw an exception.";
- System.err.println(msg);
- e.printStackTrace();
- throw new InvalidFormatException(msg, e);
- }
-
- }
-
public static DoccatFactory create(String subclassName, FeatureGenerator[] featureGenerators)
throws InvalidFormatException {
if (subclassName == null) {
@@ -192,33 +130,4 @@ public class DoccatFactory extends BaseToolFactory {
this.featureGenerators = featureGenerators;
}
- /**
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- public Tokenizer getTokenizer() {
- if (this.tokenizer == null) {
- if (artifactProvider != null) {
- String className = artifactProvider.getManifestProperty(TOKENIZER_NAME);
- if (className != null) {
- this.tokenizer = ExtensionLoader.instantiateExtension(
- Tokenizer.class, className);
- }
- }
- if (this.tokenizer == null) { // could not load using artifact provider
- this.tokenizer = WhitespaceTokenizer.INSTANCE;
- }
- }
- return tokenizer;
- }
-
- /**
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- * @param tokenizer tokenizer
- */
- @Deprecated
- public void setTokenizer(Tokenizer tokenizer) {
- this.tokenizer = tokenizer;
- }
-
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
index 88bf8f9..b180549 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
@@ -27,23 +27,21 @@ import java.util.SortedMap;
public interface DocumentCategorizer {
/**
- * Categorizes the given text, provided in separate tokens.
+ * Categorize the given text provided as tokens along with
+ * the provided extra information
*
* @param text the tokens of text to categorize
+ * @param extraInformation extra information
* @return per category probabilities
*/
- double[] categorize(String[] text);
+ double[] categorize(String[] text, Map<String, Object> extraInformation);
/**
* Categorizes the given text, provided in separate tokens.
- *
- * @param text the tokens of text to categorize
- * @param extraInformation optional extra information to pass for evaluation
+ * @param text the tokens of text to categorize
* @return per category probabilities
- * @deprecated will be removed after 1.7.1 release. Don't use it.
*/
- @Deprecated
- double[] categorize(String[] text, Map<String, Object> extraInformation);
+ double[] categorize(String[] text);
/**
* get the best category from previously generated outcome probabilities
@@ -77,25 +75,6 @@ public interface DocumentCategorizer {
int getNumberOfCategories();
/**
- * categorize a piece of text
- *
- * @param documentText the text to categorize
- * @return the probabilities of each category (sum up to 1)
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- double[] categorize(String documentText);
-
- /**
- * categorize a piece of text, providing extra metadata.
- *
- * @param documentText the text to categorize
- * @param extraInformation extra metadata
- * @return the probabilities of each category (sum up to 1)
- */
- double[] categorize(String documentText, Map<String, Object> extraInformation);
-
- /**
* get the name of the category associated with the given probabilties
*
* @param results the probabilities of each category
@@ -108,16 +87,6 @@ public interface DocumentCategorizer {
*
* @param text the input text to classify
* @return a map with the score as a key. The value is a Set of categories with the score.
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- Map<String, Double> scoreMap(String text);
-
- /**
- * Returns a map in which the key is the category name and the value is the score
- *
- * @param text the input text to classify
- * @return a map with the score as a key. The value is a Set of categories with the score.
*/
Map<String, Double> scoreMap(String[] text);
@@ -127,17 +96,6 @@ public interface DocumentCategorizer {
*
* @param text the input text to classify
* @return a map with the score as a key. The value is a Set of categories with the score.
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- SortedMap<Double, Set<String>> sortedScoreMap(String text);
-
- /**
- * Get a map of the scores sorted in ascending aorder together with their associated categories.
- * Many categories can have the same score, hence the Set as value
- *
- * @param text the input text to classify
- * @return a map with the score as a key. The value is a Set of categories with the score.
*/
SortedMap<Double, Set<String>> sortedScoreMap(String[] text);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
index 63e0768..c501280 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
@@ -59,7 +59,7 @@ public class DocumentCategorizerEvaluator extends Evaluator<DocumentSample> {
String[] document = sample.getText();
- double[] probs = categorizer.categorize(document, sample.getExtraInformation());
+ double[] probs = categorizer.categorize(document);
String cat = categorizer.getBestCategory(probs);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
index e743b9d..9dc41d7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
@@ -29,8 +29,6 @@ import java.util.TreeMap;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.tokenize.SimpleTokenizer;
-import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
@@ -48,22 +46,6 @@ public class DocumentCategorizerME implements DocumentCategorizer {
private DocumentCategorizerContextGenerator mContextGenerator;
/**
- * Initializes the current instance with a doccat model and custom feature
- * generation. The feature generation must be identical to the configuration
- * at training time.
- *
- * @param model the doccat model
- * @param featureGenerators the feature generators
- * @deprecated train a {@link DoccatModel} with a specific
- * {@link DoccatFactory} to customize the {@link FeatureGenerator}s
- */
- @Deprecated
- public DocumentCategorizerME(DoccatModel model, FeatureGenerator... featureGenerators) {
- this.model = model;
- this.mContextGenerator = new DocumentCategorizerContextGenerator(featureGenerators);
- }
-
- /**
* Initializes the current instance with a doccat model. Default feature
* generation is used.
*
@@ -75,6 +57,13 @@ public class DocumentCategorizerME implements DocumentCategorizer {
.getFactory().getFeatureGenerators());
}
+ /**
+ * Categorize the given text provided as tokens along with
+ * the provided extra information
+ *
+ * @param text text tokens to categorize
+ * @param extraInformation additional information
+ */
@Override
public double[] categorize(String[] text, Map<String, Object> extraInformation) {
return model.getMaxentModel().eval(
@@ -83,58 +72,15 @@ public class DocumentCategorizerME implements DocumentCategorizer {
/**
* Categorizes the given text.
+ *
* @param text the text to categorize
*/
+ @Override
public double[] categorize(String[] text) {
return this.categorize(text, Collections.emptyMap());
}
/**
- * Categorizes the given text. The Tokenizer is obtained from
- * {@link DoccatFactory#getTokenizer()} and defaults to
- * {@link SimpleTokenizer}.
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- @Override
- public double[] categorize(String documentText,
- Map<String, Object> extraInformation) {
- Tokenizer tokenizer = model.getFactory().getTokenizer();
- return categorize(tokenizer.tokenize(documentText), extraInformation);
- }
-
- /**
- * Categorizes the given text. The text is tokenized with the SimpleTokenizer
- * before it is passed to the feature generation.
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- public double[] categorize(String documentText) {
- Tokenizer tokenizer = model.getFactory().getTokenizer();
- return categorize(tokenizer.tokenize(documentText), Collections.emptyMap());
- }
-
- /**
- * Returns a map in which the key is the category name and the value is the score
- *
- * @param text the input text to classify
- * @return the score map
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- public Map<String, Double> scoreMap(String text) {
- Map<String, Double> probDist = new HashMap<>();
-
- double[] categorize = categorize(text);
- int catSize = getNumberOfCategories();
- for (int i = 0; i < catSize; i++) {
- String category = getCategory(i);
- probDist.put(category, categorize[getIndex(category)]);
- }
- return probDist;
- }
-
- /**
* Returns a map in which the key is the category name and the value is the score
*
* @param text the input text to classify
@@ -160,35 +106,6 @@ public class DocumentCategorizerME implements DocumentCategorizer {
*
* @param text the input text to classify
* @return the sorted score map
- * @deprecated will be removed after 1.7.1 release. Don't use it.
- */
- @Deprecated
- @Override
- public SortedMap<Double, Set<String>> sortedScoreMap(String text) {
- SortedMap<Double, Set<String>> descendingMap = new TreeMap<>();
- double[] categorize = categorize(text);
- int catSize = getNumberOfCategories();
- for (int i = 0; i < catSize; i++) {
- String category = getCategory(i);
- double score = categorize[getIndex(category)];
- if (descendingMap.containsKey(score)) {
- descendingMap.get(score).add(category);
- } else {
- Set<String> newset = new HashSet<>();
- newset.add(category);
- descendingMap.put(score, newset);
- }
- }
- return descendingMap;
- }
-
- /**
- * Returns a map with the score as a key in ascending order.
- * The value is a Set of categories with the score.
- * Many categories can have the same score, hence the Set as value
- *
- * @param text the input text to classify
- * @return the sorted score map
*/
@Override
public SortedMap<Double, Set<String>> sortedScoreMap(String[] text) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
index 3d107fa..adddc27 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
@@ -24,8 +24,6 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-
/**
* Class which holds a classified document and its category.
*/
@@ -35,10 +33,6 @@ public class DocumentSample {
private final List<String> text;
private final Map<String, Object> extraInformation;
- public DocumentSample(String category, String text) {
- this(category, WhitespaceTokenizer.INSTANCE.tokenize(text));
- }
-
public DocumentSample(String category, String[] text) {
this(category, text, null);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
index 1ca0484..8ed0036 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
@@ -20,6 +20,9 @@ package opennlp.tools.formats;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.tokenize.SimpleTokenizer;
@@ -36,7 +39,7 @@ import opennlp.tools.util.PlainTextByLineStream;
* <p>
* The input text is tokenized with the {@link SimpleTokenizer}. The input text classified
* by the language model must also be tokenized by the {@link SimpleTokenizer} to produce
- * exactly the same tokenization during testing and training.
+ * exactly the same tokenization during testing and training.�
*/
public class LeipzigDoccatSampleStream extends
FilterObjectStream<String, DocumentSample> {
@@ -79,10 +82,8 @@ public class LeipzigDoccatSampleStream extends
}
public DocumentSample read() throws IOException {
-
int count = 0;
-
- StringBuilder sampleText = new StringBuilder();
+ List<String> tokensList = new ArrayList<>();
String line;
while (count < sentencesPerDocument && (line = samples.read()) != null) {
@@ -94,17 +95,13 @@ public class LeipzigDoccatSampleStream extends
}
// Always skip first token, that is the sentence number!
- for (int i = 1; i < tokens.length; i++) {
- sampleText.append(tokens[i]);
- sampleText.append(' ');
- }
+ tokensList.addAll(Arrays.asList(tokens).subList(1, tokens.length));
count++;
}
-
- if (sampleText.length() > 0) {
- return new DocumentSample(language, sampleText.toString());
+ if (tokensList.size() > 0) {
+ return new DocumentSample(language, tokensList.toArray(new String[tokensList.size()]));
}
return null;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
index 6389530..220df87 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
@@ -42,27 +42,23 @@ public class DocumentCategorizerMETest {
new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}));
TrainingParameters params = new TrainingParameters();
- params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
- params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
+ params.put(TrainingParameters.ITERATIONS_PARAM, "100");
+ params.put(TrainingParameters.CUTOFF_PARAM, "0");
DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
params, new DoccatFactory());
DocumentCategorizer doccat = new DocumentCategorizerME(model);
- double[] aProbs = doccat.categorize("a");
+ double[] aProbs = doccat.categorize(new String[]{"a"});
Assert.assertEquals("1", doccat.getBestCategory(aProbs));
- double[] bProbs = doccat.categorize("x");
+ double[] bProbs = doccat.categorize(new String[]{"x"});
Assert.assertEquals("0", doccat.getBestCategory(bProbs));
//test to make sure sorted map's last key is cat 1 because it has the highest score.
- SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap("a");
- for (String cat : sortedScoreMap.get(sortedScoreMap.lastKey())) {
- Assert.assertEquals("1", cat);
- break;
- }
- System.out.println("");
-
+ SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"});
+ Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey());
+ Assert.assertEquals(1, cat.size());
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
index de3f098..0847690 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
@@ -44,8 +44,8 @@ public class DocumentCategorizerNBTest {
new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}));
TrainingParameters params = new TrainingParameters();
- params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
- params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
+ params.put(TrainingParameters.ITERATIONS_PARAM, "100");
+ params.put(TrainingParameters.CUTOFF_PARAM, "0");
params.put(AbstractTrainer.ALGORITHM_PARAM, NaiveBayesTrainer.NAIVE_BAYES_VALUE);
DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
@@ -53,19 +53,16 @@ public class DocumentCategorizerNBTest {
DocumentCategorizer doccat = new DocumentCategorizerME(model);
- double[] aProbs = doccat.categorize("a");
+ double[] aProbs = doccat.categorize(new String[]{"a"});
Assert.assertEquals("1", doccat.getBestCategory(aProbs));
- double[] bProbs = doccat.categorize("x");
+ double[] bProbs = doccat.categorize(new String[]{"x"});
Assert.assertEquals("0", doccat.getBestCategory(bProbs));
//test to make sure sorted map's last key is cat 1 because it has the highest score.
- SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap("a");
- for (String cat : sortedScoreMap.get(sortedScoreMap.lastKey())) {
- Assert.assertEquals("1", cat);
- break;
- }
- System.out.println("");
+ SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"});
+ Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey());
+ Assert.assertEquals(1, cat.size());
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java
index 232158b..8cf8fef 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java
@@ -31,11 +31,11 @@ public class DocumentSampleTest {
}
public static DocumentSample createGoldSample() {
- return new DocumentSample("aCategory", "a small text");
+ return new DocumentSample("aCategory", new String[]{"a", "small", "text"});
}
public static DocumentSample createPredSample() {
- return new DocumentSample("anotherCategory", "a small text");
+ return new DocumentSample("anotherCategory", new String[]{"a", "small", "text"});
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
index db9c075..4b49dca 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
@@ -17,12 +17,17 @@
package opennlp.uima.doccat;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
@@ -72,29 +77,25 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase {
mCategorizer = new DocumentCategorizerME(model);
}
- public void typeSystemInit(TypeSystem typeSystem)
- throws AnalysisEngineProcessException {
+ public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException {
mTokenType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
- UimaUtil.SENTENCE_TYPE_PARAMETER);
+ UimaUtil.TOKEN_TYPE_PARAMETER);
}
protected abstract void setBestCategory(CAS cas, String bestCategory);
public void process(CAS cas) {
- double[] result;
-
- if (mTokenType != null) {
- // TODO:
- // count tokens
- // create token array
- // pass array to doccat
- // create result annotation
- result = mCategorizer.categorize(cas.getDocumentText());
- } else {
- result = mCategorizer.categorize(cas.getDocumentText());
+ FSIterator<AnnotationFS> tokenAnnotations = cas.getAnnotationIndex(mTokenType).iterator();
+ List<String> tokensList = new ArrayList<>();
+
+ while (tokenAnnotations.hasNext()) {
+ tokensList.add(tokenAnnotations.next().getCoveredText());
}
+ double[] result =
+ mCategorizer.categorize(tokensList.toArray(new String[tokensList.size()]));
+
String bestCategory = mCategorizer.getBestCategory(result);
setBestCategory(cas, bestCategory);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/c17c5511/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
index 8847107..730d6be 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
@@ -329,8 +329,7 @@ public final class AnnotatorUtil {
} else {
throw new ResourceInitializationException(
ExceptionMessages.MESSAGE_CATALOG,
- ExceptionMessages.WRONG_PARAMETER_TYPE, new Object[] {parameter,
- "String array"});
+ ExceptionMessages.WRONG_PARAMETER_TYPE, new Object[] {parameter, "String array"});
}
}
@@ -443,8 +442,7 @@ public final class AnnotatorUtil {
if (inResource == null) {
throw new ResourceInitializationException(
ExceptionMessages.MESSAGE_CATALOG,
- ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] {name
- + " could not be found!"});
+ ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] {name + " could not be found!"});
}
return inResource;
[09/24] opennlp git commit: NoJira: Run jacoco during build and not
afterwards
Posted by co...@apache.org.
NoJira: Run jacoco during build and not afterwards
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5be5b527
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5be5b527
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5be5b527
Branch: refs/heads/OPENNLP-778
Commit: 5be5b527f5c8b46d36e51b8757650695855765a8
Parents: c5a15b2
Author: J�rn Kottmann <jo...@apache.org>
Authored: Mon Feb 20 14:22:56 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Mon Feb 20 15:54:35 2017 +0100
----------------------------------------------------------------------
.travis.yml | 4 ++--
pom.xml | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/5be5b527/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 49d902e..b3399b2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,7 +20,7 @@ before_install:
- export M2_HOME=$PWD/apache-maven-3.3.9
- export PATH=$M2_HOME/bin:$PATH
-script: mvn clean install
+script: mvn clean install -Pjacoco
after_success:
- - mvn clean test -Pjacoco jacoco:report coveralls:report
+ - mvn jacoco:report coveralls:report
http://git-wip-us.apache.org/repos/asf/opennlp/blob/5be5b527/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 8e37452..45d3c37 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,9 +214,6 @@
<groupId>org.eluder.coveralls</groupId>
<artifactId>coveralls-maven-plugin</artifactId>
<version>${coveralls.maven.plugin}</version>
- <configuration>
- <repoToken>BD8e0j90KZlQdko7H3wEo5a0mTLhmoeyk</repoToken>
- </configuration>
</plugin>
<plugin>
@@ -397,6 +394,9 @@
<profile>
<id>jacoco</id>
+ <properties>
+ <opennlp.forkCount>1</opennlp.forkCount>
+ </properties>
<build>
<plugins>
<plugin>
[04/24] opennlp git commit: OPENNLP-990 Fix all array style
violations and add a checkstyle rule
Posted by co...@apache.org.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
index 1e90ecc..259d9f4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
@@ -37,8 +37,8 @@ public class PosSampleStream extends FilterObjectStream<Parse, POSSample> {
Parse[] nodes = parse.getTagNodes();
- String toks[] = new String[nodes.length];
- String preds[] = new String[nodes.length];
+ String[] toks = new String[nodes.length];
+ String[] preds = new String[nodes.length];
for (int ti = 0; ti < nodes.length; ti++) {
Parse tok = nodes[ti];
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
index f103450..5f5eb25 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
@@ -99,7 +99,7 @@ public class POSDictionary implements Iterable<String>, MutableTagDictionary {
return dictionary.keySet().iterator();
}
- private static String tagsToString(String tags[]) {
+ private static String tagsToString(String[] tags) {
StringBuilder tagString = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
index 26cb79c..eaf6baf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
@@ -57,8 +57,8 @@ public class POSEvaluator extends Evaluator<POSSample> {
@Override
protected POSSample processSample(POSSample reference) {
- String predictedTags[] = tagger.tag(reference.getSentence(), reference.getAddictionalContext());
- String referenceTags[] = reference.getTags();
+ String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext());
+ String[] referenceTags = reference.getTags();
for (int i = 0; i < referenceTags.length; i++) {
if (referenceTags[i].equals(predictedTags[i])) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
index b1b2d32..9512e38 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
@@ -37,7 +37,7 @@ public class POSSample {
private final String[][] additionalContext;
- public POSSample(String sentence[], String tags[]) {
+ public POSSample(String[] sentence, String[] tags) {
this(sentence, tags, null);
}
@@ -66,7 +66,7 @@ public class POSSample {
this.additionalContext = ac;
}
- public POSSample(String sentence[], String tags[],
+ public POSSample(String[] sentence, String[] tags,
String[][] additionalContext) {
this(Arrays.asList(sentence), Arrays.asList(tags), additionalContext);
}
@@ -120,10 +120,10 @@ public class POSSample {
public static POSSample parse(String sentenceString) throws InvalidFormatException {
- String tokenTags[] = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
+ String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
- String sentence[] = new String[tokenTags.length];
- String tags[] = new String[tokenTags.length];
+ String[] sentence = new String[tokenTags.length];
+ String[] tags = new String[tokenTags.length];
for (int i = 0; i < tokenTags.length; i++) {
int split = tokenTags[i].lastIndexOf("_");
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
index 4dd31e0..aa3c99d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
@@ -63,9 +63,9 @@ public class POSSampleEventStream extends AbstractEventStream<POSSample> {
@Override
protected Iterator<Event> createEvents(POSSample sample) {
- String sentence[] = sample.getSentence();
- String tags[] = sample.getTags();
- Object ac[] = sample.getAddictionalContext();
+ String[] sentence = sample.getSentence();
+ String[] tags = sample.getTags();
+ Object[] ac = sample.getAddictionalContext();
List<Event> events = generateEvents(sentence, tags, ac, cg);
return events.iterator();
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java
index b81fc48..9942d67 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java
@@ -59,8 +59,8 @@ public class POSSampleSequenceStream implements SequenceStream {
POSSample sample = psi.read();
if (sample != null) {
- String sentence[] = sample.getSentence();
- String tags[] = sample.getTags();
+ String[] sentence = sample.getSentence();
+ String[] tags = sample.getTags();
Event[] events = new Event[sentence.length];
for (int i = 0; i < sentence.length; i++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
index 91bda34..75d0ec0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
@@ -28,14 +28,14 @@ import java.util.List;
*/
public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner {
- private char eosCharacters[];
+ private char[] eosCharacters;
/**
* Initializes the current instance.
*
* @param eosCharacters
*/
- public DefaultEndOfSentenceScanner(char eosCharacters[]) {
+ public DefaultEndOfSentenceScanner(char[] eosCharacters) {
this.eosCharacters = eosCharacters;
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java
index b246327..3effda8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java
@@ -51,8 +51,8 @@ public class SentenceDetectorEvaluator extends Evaluator<SentenceSample> {
this.sentenceDetector = sentenceDetector;
}
- private Span[] trimSpans(String document, Span spans[]) {
- Span trimedSpans[] = new Span[spans.length];
+ private Span[] trimSpans(String document, Span[] spans) {
+ Span[] trimedSpans = new Span[spans.length];
for (int i = 0; i < spans.length; i++) {
trimedSpans[i] = spans[i].trim(document);
@@ -63,7 +63,7 @@ public class SentenceDetectorEvaluator extends Evaluator<SentenceSample> {
@Override
protected SentenceSample processSample(SentenceSample sample) {
- Span predictions[] =
+ Span[] predictions =
trimSpans(sample.getDocument(), sentenceDetector.sentPosDetect(sample.getDocument()));
Span[] references = trimSpans(sample.getDocument(), sample.getSentences());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
index 2f3fd6c..b5ad804 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
@@ -128,7 +128,7 @@ public class SentenceDetectorME implements SentenceDetector {
*/
public String[] sentDetect(String s) {
Span[] spans = sentPosDetect(s);
- String sentences[];
+ String[] sentences;
if (spans.length != 0) {
sentences = new String[spans.length];
for (int si = 0; si < spans.length; si++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
index 1f1b79a..dbbd193 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
@@ -53,7 +53,7 @@ public class SentenceSample {
StringBuilder documentBuilder = new StringBuilder();
- for (String sentenceTokens[] : sentences) {
+ for (String[] sentenceTokens : sentences) {
String sampleSentence = detokenizer.detokenize(sentenceTokens, null);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java
index 9ffe649..55eca1c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java
@@ -87,8 +87,8 @@ public class DetokenizationDictionary {
* @param operations an array of operations which specifies which operation
* should be used for the provided tokens
*/
- public DetokenizationDictionary(String tokens[],
- DetokenizationDictionary.Operation operations[]) {
+ public DetokenizationDictionary(String[] tokens,
+ DetokenizationDictionary.Operation[] operations) {
if (tokens.length != operations.length)
throw new IllegalArgumentException("tokens and ops must have the same length: tokens=" +
tokens.length + ", operations=" + operations.length + "!");
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java
index 3af8597..acb9f45 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java
@@ -57,7 +57,7 @@ public interface Detokenizer {
* @param tokens the tokens to detokenize.
* @return the merge operations to detokenize the input tokens.
*/
- DetokenizationOperation[] detokenize(String tokens[]);
+ DetokenizationOperation[] detokenize(String[] tokens);
/**
* Detokenize the input tokens into a String. Tokens which
@@ -69,5 +69,5 @@ public interface Detokenizer {
*
* @return the concatenated tokens
*/
- String detokenize(String tokens[], String splitMarker);
+ String detokenize(String[] tokens, String splitMarker);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java
index 33eab9e..d53eefa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java
@@ -37,7 +37,7 @@ public class DictionaryDetokenizer implements Detokenizer {
public DetokenizationOperation[] detokenize(String[] tokens) {
- DetokenizationOperation operations[] = new DetokenizationOperation[tokens.length];
+ DetokenizationOperation[] operations = new DetokenizationOperation[tokens.length];
Set<String> matchingTokens = new HashSet<>();
@@ -79,9 +79,9 @@ public class DictionaryDetokenizer implements Detokenizer {
return operations;
}
- public String detokenize(String tokens[], String splitMarker) {
+ public String detokenize(String[] tokens, String splitMarker) {
- DetokenizationOperation operations[] = detokenize(tokens);
+ DetokenizationOperation[] operations = detokenize(tokens);
if (tokens.length != operations.length)
throw new IllegalArgumentException("tokens and operations array must have same length: tokens=" +
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java
index dd9745e..39b8a80 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java
@@ -99,7 +99,7 @@ public class TokSpanEventStream extends AbstractEventStream<TokenSample> {
List<Event> events = new ArrayList<>(50);
- Span tokens[] = tokenSample.getTokenSpans();
+ Span[] tokens = tokenSample.getTokenSpans();
String text = tokenSample.getText();
if (tokens.length > 0) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
index f895e7d..3ec3b8d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
@@ -45,7 +45,7 @@ public class TokenSample {
* @param text the text which contains the tokens.
* @param tokenSpans the spans which mark the begin and end of the tokens.
*/
- public TokenSample(String text, Span tokenSpans[]) {
+ public TokenSample(String text, Span[] tokenSpans) {
Objects.requireNonNull(tokenSpans, "tokenSpans must not be null");
this.text = Objects.requireNonNull(text, "text must not be null");
@@ -60,7 +60,7 @@ public class TokenSample {
}
}
- public TokenSample(Detokenizer detokenizer, String tokens[]) {
+ public TokenSample(Detokenizer detokenizer, String[] tokens) {
StringBuilder sentence = new StringBuilder();
@@ -159,7 +159,7 @@ public class TokenSample {
Objects.requireNonNull(sampleString, "sampleString must not be null");
Objects.requireNonNull(separatorChars, "separatorChars must not be null");
- Span whitespaceTokenSpans[] = WhitespaceTokenizer.INSTANCE.tokenizePos(sampleString);
+ Span[] whitespaceTokenSpans = WhitespaceTokenizer.INSTANCE.tokenizePos(sampleString);
// Pre-allocate 20% for newly created tokens
List<Span> realTokenSpans = new ArrayList<>((int) (whitespaceTokenSpans.length * 1.2d));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
index b70898a..fa4d35b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
@@ -55,7 +55,7 @@ public class TokenizerEvaluator extends Evaluator<TokenSample> {
@Override
protected TokenSample processSample(TokenSample reference) {
- Span predictions[] = tokenizer.tokenizePos(reference.getText());
+ Span[] predictions = tokenizer.tokenizePos(reference.getText());
fmeasure.updateScores(reference.getTokenSpans(), predictions);
return new TokenSample(reference.getText(), predictions);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
index 2feb26d..bfb87c5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
@@ -41,7 +41,7 @@ public class TokenizerStream implements ObjectStream<TokenSample> {
String inputString = input.read();
if (inputString != null) {
- Span tokens[] = tokenizer.tokenizePos(inputString);
+ Span[] tokens = tokenizer.tokenizePos(inputString);
return new TokenSample(inputString, tokens);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java b/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
index 934cbac..c6c474d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
@@ -38,7 +38,7 @@ public interface SequenceCodec<T> {
*
* @return
*/
- T[] encode(Span names[], int length);
+ T[] encode(Span[] names, int length);
/**
* Creates a sequence validator which can validate a sequence of outcomes.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
index 5736cf4..e589c93 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
@@ -27,7 +27,7 @@ import java.util.Objects;
*/
public class StringList implements Iterable<String> {
- private String tokens[];
+ private String[] tokens;
/**
* Initializes the current instance.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
index 6682ec0..3ed769b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
@@ -73,7 +73,7 @@ public class StringUtil {
* @return lower cased String
*/
public static String toLowerCase(CharSequence string) {
- char lowerCaseChars[] = new char[string.length()];
+ char[] lowerCaseChars = new char[string.length()];
for (int i = 0; i < string.length(); i++) {
lowerCaseChars[i] = Character.toLowerCase(string.charAt(i));
@@ -91,7 +91,7 @@ public class StringUtil {
* @return upper cased String
*/
public static String toUpperCase(CharSequence string) {
- char upperCaseChars[] = new char[string.length()];
+ char[] upperCaseChars = new char[string.length()];
for (int i = 0; i < string.length(); i++) {
upperCaseChars[i] = Character.toUpperCase(string.charAt(i));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
index efcfce4..e7cf25c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
@@ -21,7 +21,7 @@ import java.util.List;
public class DocumentBeginFeatureGenerator implements AdaptiveFeatureGenerator {
- private String firstSentence[];
+ private String[] firstSentence;
public void createFeatures(List<String> features, String[] tokens, int index,
String[] previousOutcomes) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
index 8cee38e..da67204 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
@@ -33,9 +33,9 @@ public class InSpanGenerator implements AdaptiveFeatureGenerator {
private final TokenNameFinder finder;
- private String currentSentence[];
+ private String[] currentSentence;
- private Span currentNames[];
+ private Span[] currentNames;
/**
* Initializes the current instance.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
index 11b5f97..93b2122 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
@@ -56,7 +56,7 @@ public class WordClusterDictionary implements SerializableArtifact {
BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
String line;
while ((line = reader.readLine()) != null) {
- String parts[] = line.split(" ");
+ String[] parts = line.split(" ");
if (parts.length == 3) {
tokenToClusterMap.put(parts[0], parts[1].intern());
} else if (parts.length == 2) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java
index 85f6e12..bcba6ea 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java
@@ -116,7 +116,7 @@ public final class ModelUtil {
ByteArrayOutputStream byteArrayOut = new ByteArrayOutputStream();
int length;
- byte buffer[] = new byte[1024];
+ byte[] buffer = new byte[1024];
while ((length = in.read(buffer)) > 0) {
byteArrayOut.write(buffer, 0, length);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
index c939442..51112df 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
@@ -85,7 +85,7 @@ public class ChunkerMETest {
@Test
public void testChunkAsArray() throws Exception {
- String preds[] = chunker.chunk(toks1, tags1);
+ String[] preds = chunker.chunk(toks1, tags1);
Assert.assertArrayEquals(expect1, preds);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java
index c4b0516..b214b26 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java
@@ -120,7 +120,7 @@ public class ArgumentParserTest {
@Test
public void testSimpleArgumentsUsage() {
- String arguments[] = new String[] {"-encoding charset",
+ String[] arguments = new String[] {"-encoding charset",
"[-iterations num]",
"[-alphaNumOpt true|false]"};
@@ -144,7 +144,7 @@ public class ArgumentParserTest {
@Test
public void testDefaultEncodingParameter() {
- String args[] = "-something aValue".split(" ");
+ String[] args = "-something aValue".split(" ");
Assert.assertTrue(ArgumentParser.validateArguments(args, ExtendsEncodingParameter.class));
ExtendsEncodingParameter params = ArgumentParser.parse(args, ExtendsEncodingParameter.class);
@@ -162,7 +162,7 @@ public class ArgumentParserTest {
}
}
- String args[] = ("-something aValue -encoding " + notTheDefaultCharset).split(" ");
+ String[] args = ("-something aValue -encoding " + notTheDefaultCharset).split(" ");
Assert.assertTrue(ArgumentParser.validateArguments(args, ExtendsEncodingParameter.class));
ExtendsEncodingParameter params = ArgumentParser.parse(args, ExtendsEncodingParameter.class);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
index d569e74..6389530 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
@@ -50,10 +50,10 @@ public class DocumentCategorizerMETest {
DocumentCategorizer doccat = new DocumentCategorizerME(model);
- double aProbs[] = doccat.categorize("a");
+ double[] aProbs = doccat.categorize("a");
Assert.assertEquals("1", doccat.getBestCategory(aProbs));
- double bProbs[] = doccat.categorize("x");
+ double[] bProbs = doccat.categorize("x");
Assert.assertEquals("0", doccat.getBestCategory(bProbs));
//test to make sure sorted map's last key is cat 1 because it has the highest score.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
index ae97840..de3f098 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
@@ -53,10 +53,10 @@ public class DocumentCategorizerNBTest {
DocumentCategorizer doccat = new DocumentCategorizerME(model);
- double aProbs[] = doccat.categorize("a");
+ double[] aProbs = doccat.categorize("a");
Assert.assertEquals("1", doccat.getBestCategory(aProbs));
- double bProbs[] = doccat.categorize("x");
+ double[] bProbs = doccat.categorize("x");
Assert.assertEquals("0", doccat.getBestCategory(bProbs));
//test to make sure sorted map's last key is cat 1 because it has the highest score.
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
index 2382375..809d785 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
@@ -41,8 +41,8 @@ public class ConllXPOSSampleStreamTest {
POSSample a = sampleStream.read();
- String aSentence[] = a.getSentence();
- String aTags[] = a.getTags();
+ String[] aSentence = a.getSentence();
+ String[] aTags = a.getTags();
assertEquals(22, aSentence.length);
assertEquals(22, aTags.length);
@@ -115,8 +115,8 @@ public class ConllXPOSSampleStreamTest {
POSSample b = sampleStream.read();
- String bSentence[] = b.getSentence();
- String bTags[] = b.getTags();
+ String[] bSentence = b.getSentence();
+ String[] bTags = b.getTags();
assertEquals(12, bSentence.length);
assertEquals(12, bTags.length);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
index fbda898..8fa31be 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
@@ -30,7 +30,7 @@ import opennlp.tools.util.ObjectStreamUtils;
public class ConstitParseSampleStreamTest {
- private String sample1Tokens[] = new String[]{
+ private String[] sample1Tokens = new String[]{
"L'",
"autonomie",
"de",
@@ -88,7 +88,7 @@ public class ConstitParseSampleStreamTest {
private static byte[] getSample1() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
- byte buffer[] = new byte[1024];
+ byte[] buffer = new byte[1024];
int length;
try (InputStream sampleIn =
ConstitParseSampleStreamTest.class.getResourceAsStream("sample1.xml")) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java
index 92528e7..46d0440 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java
@@ -33,7 +33,7 @@ public class BeamSearchTest {
private String[] outcomeSequence;
- IdentityFeatureGenerator(String outcomeSequence[]) {
+ IdentityFeatureGenerator(String[] outcomeSequence) {
this.outcomeSequence = outcomeSequence;
}
@@ -53,7 +53,7 @@ public class BeamSearchTest {
private double bestOutcomeProb = 0.8d;
private double otherOutcomeProb;
- IdentityModel(String outcomes[]) {
+ IdentityModel(String[] outcomes) {
this.outcomes = outcomes;
for (int i = 0; i < outcomes.length; i++) {
@@ -65,7 +65,7 @@ public class BeamSearchTest {
public double[] eval(String[] context) {
- double probs[] = new double[outcomes.length];
+ double[] probs = new double[outcomes.length];
for (int i = 0; i < probs.length; i++) {
if (outcomes[i].equals(context[0])) {
@@ -118,10 +118,10 @@ public class BeamSearchTest {
@Test
public void testBestSequenceZeroLengthInput() {
- String sequence[] = new String[0];
+ String[] sequence = new String[0];
BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
- String outcomes[] = new String[] {"1", "2", "3"};
+ String[] outcomes = new String[] {"1", "2", "3"};
MaxentModel model = new IdentityModel(outcomes);
BeamSearch<String> bs = new BeamSearch<>(3, model);
@@ -138,10 +138,10 @@ public class BeamSearchTest {
*/
@Test
public void testBestSequenceOneElementInput() {
- String sequence[] = {"1"};
+ String[] sequence = {"1"};
BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
- String outcomes[] = new String[] {"1", "2", "3"};
+ String[] outcomes = new String[] {"1", "2", "3"};
MaxentModel model = new IdentityModel(outcomes);
BeamSearch<String> bs = new BeamSearch<>(3, model);
@@ -160,10 +160,10 @@ public class BeamSearchTest {
*/
@Test
public void testBestSequence() {
- String sequence[] = {"1", "2", "3", "2", "1"};
+ String[] sequence = {"1", "2", "3", "2", "1"};
BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
- String outcomes[] = new String[] {"1", "2", "3"};
+ String[] outcomes = new String[] {"1", "2", "3"};
MaxentModel model = new IdentityModel(outcomes);
BeamSearch<String> bs = new BeamSearch<>(2, model);
@@ -186,10 +186,10 @@ public class BeamSearchTest {
*/
@Test
public void testBestSequenceWithValidator() {
- String sequence[] = {"1", "2", "3", "2", "1"};
+ String[] sequence = {"1", "2", "3", "2", "1"};
BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
- String outcomes[] = new String[] {"1", "2", "3"};
+ String[] outcomes = new String[] {"1", "2", "3"};
MaxentModel model = new IdentityModel(outcomes);
BeamSearch<String> bs = new BeamSearch<>(2, model, 0);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
index 7599551..08d0f97 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
@@ -59,8 +59,8 @@ public class DictionaryNameFinderTest {
public void testSingleTokeNameAtSentenceStart() {
String sentence = "Max a b c d";
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
- String tokens[] = tokenizer.tokenize(sentence);
- Span names[] = mNameFinder.find(tokens);
+ String[] tokens = tokenizer.tokenize(sentence);
+ Span[] names = mNameFinder.find(tokens);
Assert.assertTrue(names.length == 1);
Assert.assertTrue(names[0].getStart() == 0 && names[0].getEnd() == 1);
}
@@ -69,8 +69,8 @@ public class DictionaryNameFinderTest {
public void testSingleTokeNameInsideSentence() {
String sentence = "a b Max c d";
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
- String tokens[] = tokenizer.tokenize(sentence);
- Span names[] = mNameFinder.find(tokens);
+ String[] tokens = tokenizer.tokenize(sentence);
+ Span[] names = mNameFinder.find(tokens);
Assert.assertTrue(names.length == 1);
Assert.assertTrue(names[0].getStart() == 2 && names[0].getEnd() == 3);
}
@@ -80,40 +80,40 @@ public class DictionaryNameFinderTest {
String sentence = "a b c Max";
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
- String tokens[] = tokenizer.tokenize(sentence);
- Span names[] = mNameFinder.find(tokens);
+ String[] tokens = tokenizer.tokenize(sentence);
+ Span[] names = mNameFinder.find(tokens);
Assert.assertTrue(names.length == 1);
Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
}
@Test
public void testLastMatchingTokenNameIsChoosen() {
- String sentence[] = {"a", "b", "c", "Vanessa"};
- Span names[] = mNameFinder.find(sentence);
+ String[] sentence = {"a", "b", "c", "Vanessa"};
+ Span[] names = mNameFinder.find(sentence);
Assert.assertTrue(names.length == 1);
Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
}
@Test
public void testLongerTokenNameIsPreferred() {
- String sentence[] = {"a", "b", "c", "Vanessa", "Williams"};
- Span names[] = mNameFinder.find(sentence);
+ String[] sentence = {"a", "b", "c", "Vanessa", "Williams"};
+ Span[] names = mNameFinder.find(sentence);
Assert.assertTrue(names.length == 1);
Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
}
@Test
public void testCaseSensitivity() {
- String sentence[] = {"a", "b", "c", "vanessa", "williams"};
- Span names[] = mNameFinder.find(sentence);
+ String[] sentence = {"a", "b", "c", "vanessa", "williams"};
+ Span[] names = mNameFinder.find(sentence);
Assert.assertTrue(names.length == 1);
Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
}
@Test
public void testCaseLongerEntry() {
- String sentence[] = {"a", "b", "michael", "jordan"};
- Span names[] = mNameFinder.find(sentence);
+ String[] sentence = {"a", "b", "michael", "jordan"};
+ Span[] names = mNameFinder.find(sentence);
Assert.assertTrue(names.length == 1);
Assert.assertTrue(names[0].length() == 2);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
index 494af62..c258d07 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
@@ -78,7 +78,7 @@ public class NameFinderMETest {
// now test if it can detect the sample sentences
- String sentence[] = {"Alisa",
+ String[] sentence = {"Alisa",
"appreciated",
"the",
"hint",
@@ -89,7 +89,7 @@ public class NameFinderMETest {
"traditional",
"meal."};
- Span names[] = nameFinder.find(sentence);
+ Span[] names = nameFinder.find(sentence);
Assert.assertEquals(1, names.length);
Assert.assertEquals(new Span(0, 1, DEFAULT), names[0]);
@@ -150,10 +150,10 @@ public class NameFinderMETest {
Assert.assertEquals("person", names2[0].getType());
Assert.assertEquals("person", names2[1].getType());
- String sentence[] = { "Alisa", "appreciated", "the", "hint", "and",
+ String[] sentence = { "Alisa", "appreciated", "the", "hint", "and",
"enjoyed", "a", "delicious", "traditional", "meal." };
- Span names[] = nameFinder.find(sentence);
+ Span[] names = nameFinder.find(sentence);
Assert.assertEquals(1, names.length);
Assert.assertEquals(new Span(0, 1, "person"), names[0]);
@@ -319,8 +319,8 @@ public class NameFinderMETest {
@Test
public void testDropOverlappingSpans() {
- Span spans[] = new Span[] {new Span(1, 10), new Span(1,11), new Span(1,11), new Span(5, 15)};
- Span remainingSpan[] = NameFinderME.dropOverlappingSpans(spans);
+ Span[] spans = new Span[] {new Span(1, 10), new Span(1,11), new Span(1,11), new Span(5, 15)};
+ Span[] remainingSpan = NameFinderME.dropOverlappingSpans(spans);
Assert.assertEquals(new Span(1, 11), remainingSpan[0]);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
index cf533f4..911f998 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
@@ -101,7 +101,7 @@ public class NameSampleTest {
@Test
public void testNameAtEnd() {
- String sentence[] = new String[] {
+ String[] sentence = new String[] {
"My",
"name",
"is",
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
index 0b3fe2a..c0f2fea 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
@@ -35,7 +35,7 @@ public class RegexNameFinderTest {
public void testFindSingleTokenPattern() {
Pattern testPattern = Pattern.compile("test");
- String sentence[] = new String[]{"a", "test", "b", "c"};
+ String[] sentence = new String[]{"a", "test", "b", "c"};
Pattern[] patterns = new Pattern[]{testPattern};
@@ -59,7 +59,7 @@ public class RegexNameFinderTest {
public void testFindTokenizdPattern() {
Pattern testPattern = Pattern.compile("[0-9]+ year");
- String sentence[] = new String[]{"a", "80", "year", "b", "c"};
+ String[] sentence = new String[]{"a", "80", "year", "b", "c"};
Pattern[] patterns = new Pattern[]{testPattern};
Map<String, Pattern[]> regexMap = new HashMap<>();
@@ -83,7 +83,7 @@ public class RegexNameFinderTest {
public void testFindMatchingPatternWithoutMatchingTokenBounds() {
Pattern testPattern = Pattern.compile("[0-8] year"); // does match "0 year"
- String sentence[] = new String[]{"a", "80", "year", "c"};
+ String[] sentence = new String[]{"a", "80", "year", "c"};
Pattern[] patterns = new Pattern[]{testPattern};
Map<String, Pattern[]> regexMap = new HashMap<>();
String type = "testtype";
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java
index f8220c6..5d594ed 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java
@@ -102,7 +102,7 @@ public class ParseTest {
public void testGetTagNodes() {
Parse p = Parse.parseParse(PARSE_STRING);
- Parse tags[] = p.getTagNodes();
+ Parse[] tags = p.getTagNodes();
for (Parse node : tags) {
Assert.assertTrue(node.isPosTag());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
index 2e616b5..51cae2c 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
@@ -63,7 +63,7 @@ public class POSTaggerMETest {
POSTagger tagger = new POSTaggerME(posModel);
- String tags[] = tagger.tag(new String[] {
+ String[] tags = tagger.tag(new String[] {
"The",
"driver",
"got",
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java
index 2b9e984..01b96fc 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java
@@ -41,7 +41,7 @@ public class WordTagSampleStreamTest {
new WordTagSampleStream(new CollectionObjectStream<>(sampleString));
POSSample sample = stream.read();
- String words[] = sample.getSentence();
+ String[] words = sample.getSentence();
Assert.assertEquals("This", words[0]);
Assert.assertEquals("is", words[1]);
@@ -50,7 +50,7 @@ public class WordTagSampleStreamTest {
Assert.assertEquals("sentence", words[4]);
Assert.assertEquals(".", words[5]);
- String tags[] = sample.getTags();
+ String[] tags = sample.getTags();
Assert.assertEquals("x1", tags[0]);
Assert.assertEquals("x2", tags[1]);
Assert.assertEquals("x3", tags[2]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java
index c700f3c..6e226bb 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java
@@ -29,7 +29,7 @@ public class NewlineSentenceDetectorTest {
private static void testSentenceValues(String sentences) {
NewlineSentenceDetector sd = new NewlineSentenceDetector();
- String results[] = sd.sentDetect(sentences);
+ String[] results = sd.sentDetect(sentences);
Assert.assertEquals(3, results.length);
Assert.assertEquals("one.", results[0]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
index 78d41cc..43d5829 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
@@ -124,7 +124,7 @@ public class SentenceDetectorMETest {
Assert.assertEquals(sents[0],"This is a test");
// Test that sentPosDetect also works
- Span pos[] = sentDetect.sentPosDetect(sampleSentences2);
+ Span[] pos = sentDetect.sentPosDetect(sampleSentences2);
Assert.assertEquals(pos.length,2);
probs = sentDetect.getSentenceProbabilities();
Assert.assertEquals(probs.length,2);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java
index fa04457..d6ad672 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java
@@ -31,9 +31,9 @@ public class DictionaryDetokenizerTest {
@Test
public void testDetokenizer() {
- String tokens[] = new String[]{".", "!", "(", ")", "\"", "-"};
+ String[] tokens = new String[]{".", "!", "(", ")", "\"", "-"};
- Operation operations[] = new Operation[]{
+ Operation[] operations = new Operation[]{
Operation.MOVE_LEFT,
Operation.MOVE_LEFT,
Operation.MOVE_RIGHT,
@@ -44,7 +44,7 @@ public class DictionaryDetokenizerTest {
DetokenizationDictionary dict = new DetokenizationDictionary(tokens, operations);
Detokenizer detokenizer = new DictionaryDetokenizer(dict);
- DetokenizationOperation detokenizeOperations[] =
+ DetokenizationOperation[] detokenizeOperations =
detokenizer.detokenize(new String[]{"Simple", "test", ".", "co", "-", "worker"});
Assert.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[0]);
@@ -71,7 +71,7 @@ public class DictionaryDetokenizerTest {
Detokenizer detokenizer = createLatinDetokenizer();
- String tokens[] = new String[]{"A", "test", ",", "(", "string", ")", "."};
+ String[] tokens = new String[]{"A", "test", ",", "(", "string", ")", "."};
String sentence = detokenizer.detokenize(tokens, null);
@@ -83,7 +83,7 @@ public class DictionaryDetokenizerTest {
Detokenizer detokenizer = createLatinDetokenizer();
- String tokens[] = new String[]{"A", "co", "-", "worker", "helped", "."};
+ String[] tokens = new String[]{"A", "co", "-", "worker", "helped", "."};
String sentence = detokenizer.detokenize(tokens, null);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java
index 9ea82bb..84f5f10 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java
@@ -45,7 +45,7 @@ public class TokenSampleStreamTest {
TokenSample tokenSample = sampleTokenStream.read();
- Span tokenSpans[] = tokenSample.getTokenSpans();
+ Span[] tokenSpans = tokenSample.getTokenSpans();
Assert.assertEquals(4, tokenSpans.length);
@@ -69,7 +69,7 @@ public class TokenSampleStreamTest {
TokenSample tokenSample = sampleTokenStream.read();
- Span tokenSpans[] = tokenSample.getTokenSpans();
+ Span[] tokenSpans = tokenSample.getTokenSpans();
Assert.assertEquals(4, tokenSpans.length);
@@ -101,7 +101,7 @@ public class TokenSampleStreamTest {
TokenSample tokenSample = sampleTokenStream.read();
- Span tokenSpans[] = tokenSample.getTokenSpans();
+ Span[] tokenSpans = tokenSample.getTokenSpans();
Assert.assertEquals(5, tokenSpans.length);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java
index 81f3507..1c329bc 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java
@@ -45,7 +45,7 @@ public class TokenSampleTest {
Detokenizer detokenizer = DictionaryDetokenizerTest.createLatinDetokenizer();
- String tokens[] = new String[]{
+ String[] tokens = new String[]{
"start",
"(", // move right
")", // move left
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
index 9acb2e8..5a7a811 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
@@ -40,7 +40,7 @@ public class TokenizerMETest {
TokenizerME tokenizer = new TokenizerME(model);
- String tokens[] = tokenizer.tokenize("test,");
+ String[] tokens = tokenizer.tokenize("test,");
Assert.assertEquals(2, tokens.length);
Assert.assertEquals("test", tokens[0]);
@@ -52,7 +52,7 @@ public class TokenizerMETest {
TokenizerModel model = TokenizerTestUtil.createMaxentTokenModel();
TokenizerME tokenizer = new TokenizerME(model);
- String tokens[] = tokenizer.tokenize("Sounds like it's not properly thought through!");
+ String[] tokens = tokenizer.tokenize("Sounds like it's not properly thought through!");
Assert.assertEquals(9, tokens.length);
Assert.assertEquals("Sounds", tokens[0]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
index b57b0ec..09e2f44 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
@@ -29,7 +29,7 @@ public class FMeasureTest {
private static final double DELTA = 1.0E-9d;
- private Span gold[] = {
+ private Span[] gold = {
new Span(8, 9),
new Span(9, 10),
new Span(10, 12),
@@ -38,7 +38,7 @@ public class FMeasureTest {
new Span(15, 16)
};
- private Span predicted[] = {
+ private Span[] predicted = {
new Span(14, 15),
new Span(15, 16),
new Span(100, 120),
@@ -46,7 +46,7 @@ public class FMeasureTest {
new Span(220, 230)
};
- private Span predictedCompletelyDistinct[] = {
+ private Span[] predictedCompletelyDistinct = {
new Span(100, 120),
new Span(210, 220),
new Span(211, 220),
@@ -54,7 +54,7 @@ public class FMeasureTest {
new Span(220, 230)
};
- private Span goldToMerge[] = {
+ private Span[] goldToMerge = {
new Span(8, 9),
new Span(9, 10),
new Span(11, 11),
@@ -64,7 +64,7 @@ public class FMeasureTest {
new Span(18, 19),
};
- private Span predictedToMerge[] = {
+ private Span[] predictedToMerge = {
new Span(8, 9),
new Span(14, 15),
new Span(15, 16),
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
index 01b1d95..66471ff 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
@@ -29,12 +29,12 @@ import org.junit.Test;
*/
public class CachedFeatureGeneratorTest {
- private AdaptiveFeatureGenerator identityGenerator[] = new AdaptiveFeatureGenerator[] {
+ private AdaptiveFeatureGenerator[] identityGenerator = new AdaptiveFeatureGenerator[] {
new IdentityFeatureGenerator()};
- private String testSentence1[];
+ private String[] testSentence1;
- private String testSentence2[];
+ private String[] testSentence2;
private List<String> features;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java
index c9da178..ca23589 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java
@@ -33,7 +33,7 @@ public class PreviousMapFeatureGeneratorTest {
AdaptiveFeatureGenerator fg = new PreviousMapFeatureGenerator();
- String sentence[] = new String[] {"a", "b", "c"};
+ String[] sentence = new String[] {"a", "b", "c"};
List<String> features = new ArrayList<>();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
index a60ac9b..2edaba7 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
@@ -150,7 +150,7 @@ public final class Chunker extends CasAnnotator_ImplBase {
context, mTokenType, UimaUtil.POS_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING);
}
- private void addChunkAnnotation(CAS tcas, AnnotationFS tokenAnnotations[],
+ private void addChunkAnnotation(CAS tcas, AnnotationFS[] tokenAnnotations,
String tag, int start, int end) {
AnnotationFS chunk = tcas.createAnnotation(mChunkType,
tokenAnnotations[start].getBegin(), tokenAnnotations[end - 1].getEnd());
@@ -167,9 +167,9 @@ public final class Chunker extends CasAnnotator_ImplBase {
FSIndex<AnnotationFS> tokenAnnotationIndex = tcas.getAnnotationIndex(mTokenType);
- String tokens[] = new String[tokenAnnotationIndex.size()];
- String pos[] = new String[tokenAnnotationIndex.size()];
- AnnotationFS tokenAnnotations[] = new AnnotationFS[tokenAnnotationIndex
+ String[] tokens = new String[tokenAnnotationIndex.size()];
+ String[] pos = new String[tokenAnnotationIndex.size()];
+ AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenAnnotationIndex
.size()];
int index = 0;
@@ -184,7 +184,7 @@ public final class Chunker extends CasAnnotator_ImplBase {
mPosFeature);
}
- String result[] = mChunker.chunk(tokens, pos);
+ String[] result = mChunker.chunk(tokens, pos);
int start = -1;
int end = -1;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
index 6d76c8f..db9c075 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
@@ -82,7 +82,7 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase {
public void process(CAS cas) {
- double result[];
+ double[] result;
if (mTokenType != null) {
// TODO:
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
index 793da86..67efb55 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
@@ -134,7 +134,7 @@ abstract class AbstractNameFinder extends CasAnnotator_ImplBase {
}
}
- protected void postProcessAnnotations(Span detectedNames[],
+ protected void postProcessAnnotations(Span[] detectedNames,
AnnotationFS[] nameAnnotations) {
}
@@ -186,7 +186,7 @@ abstract class AbstractNameFinder extends CasAnnotator_ImplBase {
Span[] names = find(cas,
sentenceTokenList.toArray(new String[sentenceTokenList.size()]));
- AnnotationFS nameAnnotations[] = new AnnotationFS[names.length];
+ AnnotationFS[] nameAnnotations = new AnnotationFS[names.length];
for (int i = 0; i < names.length; i++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
index e89d2d6..2a844cf 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
@@ -154,9 +154,9 @@ public final class NameFinder extends AbstractNameFinder {
protected Span[] find(CAS cas, String[] tokens) {
- Span names[] = mNameFinder.find(tokens);
+ Span[] names = mNameFinder.find(tokens);
- double probs[] = mNameFinder.probs();
+ double[] probs = mNameFinder.probs();
for (double prob : probs) {
documentConfidence.add(prob);
@@ -165,7 +165,7 @@ public final class NameFinder extends AbstractNameFinder {
return names;
}
- protected void postProcessAnnotations(Span detectedNames[],
+ protected void postProcessAnnotations(Span[] detectedNames,
AnnotationFS[] nameAnnotations) {
if (probabilityFeature != null) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
index 8e8920f..e3544b1 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
@@ -36,7 +36,7 @@ public final class NumberUtil {
public static boolean isLanguageSupported(String languageCode) {
Locale locale = new Locale(languageCode);
- Locale possibleLocales[] = NumberFormat.getAvailableLocales();
+ Locale[] possibleLocales = NumberFormat.getAvailableLocales();
boolean isLocaleSupported = false;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
index 01c25c3..d147259 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
@@ -199,8 +199,8 @@ public class Parser extends CasAnnotator_ImplBase {
protected AnnotationFS createAnnotation(CAS cas, int offset, Parse parse) {
- Parse parseChildren[] = parse.getChildren();
- AnnotationFS parseChildAnnotations[] = new AnnotationFS[parseChildren.length];
+ Parse[] parseChildren = parse.getChildren();
+ AnnotationFS[] parseChildAnnotations = new AnnotationFS[parseChildren.length];
// do this for all children
for (int i = 0; i < parseChildren.length; i++) {
@@ -244,13 +244,13 @@ public class Parser extends CasAnnotator_ImplBase {
* @param sentence
* @param tokens
*/
- public ParseConverter(String sentence, Span tokens[]) {
+ public ParseConverter(String sentence, Span[] tokens) {
mSentence = sentence;
StringBuilder sentenceStringBuilder = new StringBuilder();
- String tokenList[] = new String[tokens.length];
+ String[] tokenList = new String[tokens.length];
for (int i = 0; i < tokens.length; i++) {
String tokenString = tokens[i].getCoveredText(sentence).toString();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
index d26c64a..db0f66d 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
@@ -180,7 +180,7 @@ public final class POSTagger extends CasAnnotator_ImplBase {
final List<String> posTags = Arrays.asList(this.posTagger.tag(
sentenceTokenList.toArray(new String[sentenceTokenList.size()])));
- double posProbabilities[] = null;
+ double[] posProbabilities = null;
if (this.probabilityFeature != null) {
posProbabilities = this.posTagger.probs();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
index a905199..ab9095a 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
@@ -87,7 +87,7 @@ public abstract class AbstractSentenceDetector extends CasAnnotator_ImplBase {
protected abstract Span[] detectSentences(String text);
- protected void postProcessAnnotations(AnnotationFS sentences[]) {
+ protected void postProcessAnnotations(AnnotationFS[] sentences) {
}
@Override
@@ -106,7 +106,7 @@ public abstract class AbstractSentenceDetector extends CasAnnotator_ImplBase {
Span[] sentPositions = detectSentences(text);
- AnnotationFS sentences[] = new AnnotationFS[sentPositions.length];
+ AnnotationFS[] sentences = new AnnotationFS[sentPositions.length];
for (int i = 0; i < sentPositions.length; i++) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
index fdb434d..acb5c6b 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
@@ -114,10 +114,10 @@ public final class SentenceDetector extends AbstractSentenceDetector {
}
@Override
- protected void postProcessAnnotations(AnnotationFS sentences[]) {
+ protected void postProcessAnnotations(AnnotationFS[] sentences) {
if (probabilityFeature != null) {
- double sentenceProbabilities[] = sentenceDetector.getSentenceProbabilities();
+ double[] sentenceProbabilities = sentenceDetector.getSentenceProbabilities();
for (int i = 0; i < sentences.length; i++) {
sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
index ee02dc9..b1f7abc 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
@@ -91,8 +91,8 @@ public abstract class AbstractTokenizer extends CasAnnotator_ImplBase {
UimaUtil.TOKEN_TYPE_PARAMETER);
}
- protected void postProcessAnnotations(Span tokens[],
- AnnotationFS tokenAnnotations[]) {
+ protected void postProcessAnnotations(Span[] tokens,
+ AnnotationFS[] tokenAnnotations) {
}
protected abstract Span[] tokenize(CAS cas, AnnotationFS sentence);
@@ -107,13 +107,13 @@ public abstract class AbstractTokenizer extends CasAnnotator_ImplBase {
UimaUtil.removeAnnotations(cas, sentence, tokenType);
}
- Span tokenSpans[] = tokenize(cas, sentence);
+ Span[] tokenSpans = tokenize(cas, sentence);
int sentenceOffset = sentence.getBegin();
StringBuilder tokeninzedSentenceLog = new StringBuilder();
- AnnotationFS tokenAnnotations[] = new AnnotationFS[tokenSpans.length];
+ AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenSpans.length];
for (int i = 0; i < tokenSpans.length; i++) {
tokenAnnotations[i] = cas
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
index 9f72f92..b558241 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
@@ -123,7 +123,7 @@ public final class Tokenizer extends AbstractTokenizer {
AnnotationFS[] tokenAnnotations) {
// if interest
if (probabilityFeature != null) {
- double tokenProbabilties[] = tokenizer.getTokenProbabilities();
+ double[] tokenProbabilties = tokenizer.getTokenProbabilities();
for (int i = 0; i < tokenAnnotations.length; i++) {
tokenAnnotations[i].setDoubleValue(probabilityFeature,
http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbd6d3fc/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
index 338dfec..ecec498 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
@@ -61,7 +61,7 @@ final public class OpennlpUtil {
try (InputStream in = new FileInputStream(inFile)) {
- byte buffer[] = new byte[1024];
+ byte[] buffer = new byte[1024];
int len;
while ((len = in.read(buffer)) > 0) {
bytes.write(buffer, 0, len);
[19/24] opennlp git commit: OPENNLP-998: Fixing Maven build on MacOS
Posted by co...@apache.org.
OPENNLP-998: Fixing Maven build on MacOS
This closes #136
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/347babe1
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/347babe1
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/347babe1
Branch: refs/heads/OPENNLP-778
Commit: 347babe193172c6eb96961ea706d1de249f3f931
Parents: 8a3b3b5
Author: Madhav Sharan <go...@gmail.com>
Authored: Sat Mar 4 11:16:25 2017 -0800
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Mon Mar 6 20:27:01 2017 +0100
----------------------------------------------------------------------
opennlp-distr/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/347babe1/opennlp-distr/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index 1ce102d..3f838cd 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -72,7 +72,7 @@
many file have more than 100 chars.
Right now only javadoc files are too long.
-->
- <tarLongFileMode>gnu</tarLongFileMode>
+ <tarLongFileMode>posix</tarLongFileMode>
<finalName>apache-opennlp-${project.version}</finalName>
</configuration>
[10/24] opennlp git commit: OPENNLP-992: Distribution package should
include example parameters file
Posted by co...@apache.org.
OPENNLP-992: Distribution package should include example parameters file
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/413c7fc0
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/413c7fc0
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/413c7fc0
Branch: refs/heads/OPENNLP-778
Commit: 413c7fc0768ed543f1860df518287d7df54f4328
Parents: 5be5b52
Author: smarthi <sm...@apache.org>
Authored: Tue Feb 21 13:47:29 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Tue Feb 21 13:47:29 2017 -0500
----------------------------------------------------------------------
opennlp-distr/src/main/assembly/bin.xml | 7 +++++++
1 file changed, 7 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/413c7fc0/opennlp-distr/src/main/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/opennlp-distr/src/main/assembly/bin.xml b/opennlp-distr/src/main/assembly/bin.xml
index 43d95f4..b1e27c8 100644
--- a/opennlp-distr/src/main/assembly/bin.xml
+++ b/opennlp-distr/src/main/assembly/bin.xml
@@ -87,6 +87,13 @@
<directoryMode>755</directoryMode>
<outputDirectory>bin</outputDirectory>
</fileSet>
+
+ <fileSet>
+ <directory>../opennlp-tools/lang</directory>
+ <fileMode>644</fileMode>
+ <directoryMode>755</directoryMode>
+ <outputDirectory>lang</outputDirectory>
+ </fileSet>
<fileSet>
<directory>../opennlp-docs/target/docbkx/html</directory>
[18/24] opennlp git commit: OPENNLP-904 Harmonize lemmatizer API and
function to get multiple lemmas
Posted by co...@apache.org.
OPENNLP-904 Harmonize lemmatizer API and function to get multiple lemmas
OPENNLP-904 add minor correction after PR comment
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/8a3b3b53
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/8a3b3b53
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/8a3b3b53
Branch: refs/heads/OPENNLP-778
Commit: 8a3b3b537a30b14c4ffb5eb32ffa41d5027bddad
Parents: ebf1081
Author: Rodrigo Agerri <ra...@apache.org>
Authored: Fri Feb 3 16:00:38 2017 +0100
Committer: Rodrigo Agerri <ra...@apache.org>
Committed: Fri Mar 3 09:16:20 2017 +0100
----------------------------------------------------------------------
.../cmdline/lemmatizer/LemmatizerMETool.java | 4 +-
.../tools/lemmatizer/DictionaryLemmatizer.java | 70 ++++++++++++++------
.../lemmatizer/LemmaSampleEventStream.java | 2 +-
.../tools/lemmatizer/LemmaSampleStream.java | 4 +-
.../opennlp/tools/lemmatizer/Lemmatizer.java | 16 ++++-
.../opennlp/tools/lemmatizer/LemmatizerME.java | 64 ++++++++++++++++--
.../tools/lemmatizer/DummyLemmatizer.java | 7 ++
.../tools/lemmatizer/LemmatizerMETest.java | 3 +-
8 files changed, 136 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8a3b3b53/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
index e4e47b5..90ba95d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
@@ -72,10 +72,8 @@ public class LemmatizerMETool extends BasicCmdLineTool {
continue;
}
- String[] preds = lemmatizer.lemmatize(posSample.getSentence(),
+ String[] lemmas = lemmatizer.lemmatize(posSample.getSentence(),
posSample.getTags());
- String[] lemmas = lemmatizer.decodeLemmas(posSample.getSentence(),
- preds);
System.out.println(new LemmaSample(posSample.getSentence(),
posSample.getTags(), lemmas).toString());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8a3b3b53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
index b1b04a1..9f0b0b0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
@@ -37,7 +37,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
/**
* The hashmap containing the dictionary.
*/
- private final Map<List<String>, String> dictMap;
+ private final Map<List<String>, List<String>> dictMap;
/**
* Construct a hashmap from the input tab separated dictionary.
@@ -47,26 +47,24 @@ public class DictionaryLemmatizer implements Lemmatizer {
* @param dictionary
* the input dictionary via inputstream
*/
- public DictionaryLemmatizer(final InputStream dictionary) {
+ public DictionaryLemmatizer(final InputStream dictionary) throws IOException {
this.dictMap = new HashMap<>();
- final BufferedReader breader = new BufferedReader(new InputStreamReader(dictionary));
+ final BufferedReader breader = new BufferedReader(
+ new InputStreamReader(dictionary));
String line;
- try {
- while ((line = breader.readLine()) != null) {
- final String[] elems = line.split("\t");
- this.dictMap.put(Arrays.asList(elems[0], elems[1]), elems[2]);
- }
- } catch (final IOException e) {
- e.printStackTrace();
+ while ((line = breader.readLine()) != null) {
+ final String[] elems = line.split("\t");
+ this.dictMap.put(Arrays.asList(elems[0], elems[1]), Arrays.asList(elems[2]));
}
}
+
/**
* Get the Map containing the dictionary.
*
* @return dictMap the Map
*/
- public Map<List<String>, String> getDictMap() {
+ public Map<List<String>, List<String>> getDictMap() {
return this.dictMap;
}
@@ -85,31 +83,65 @@ public class DictionaryLemmatizer implements Lemmatizer {
return keys;
}
+
public String[] lemmatize(final String[] tokens, final String[] postags) {
List<String> lemmas = new ArrayList<>();
for (int i = 0; i < tokens.length; i++) {
- lemmas.add(this.apply(tokens[i], postags[i]));
+ lemmas.add(this.lemmatize(tokens[i], postags[i]));
}
return lemmas.toArray(new String[lemmas.size()]);
}
+ public List<List<String>> lemmatize(final List<String> tokens, final List<String> posTags) {
+ List<List<String>> allLemmas = new ArrayList<>();
+ for (int i = 0; i < tokens.size(); i++) {
+ allLemmas.add(this.getAllLemmas(tokens.get(i), posTags.get(i)));
+ }
+ return allLemmas;
+ }
+
/**
* Lookup lemma in a dictionary. Outputs "O" if not found.
- * @param word the token
- * @param postag the postag
+ *
+ * @param word
+ * the token
+ * @param postag
+ * the postag
* @return the lemma
*/
- public String apply(final String word, final String postag) {
+ private String lemmatize(final String word, final String postag) {
String lemma;
final List<String> keys = this.getDictKeys(word, postag);
// lookup lemma as value of the map
- final String keyValue = this.dictMap.get(keys);
- if (keyValue != null) {
- lemma = keyValue;
+ final List<String> keyValues = this.dictMap.get(keys);
+ if (!keyValues.isEmpty()) {
+ lemma = keyValues.get(0);
} else {
lemma = "O";
}
return lemma;
}
-}
+ /**
+ * Lookup every lemma for a word,pos tag in a dictionary. Outputs "O" if not
+ * found.
+ *
+ * @param word
+ * the token
+ * @param postag
+ * the postag
+ * @return every lemma
+ */
+ private List<String> getAllLemmas(final String word, final String postag) {
+ List<String> lemmasList = new ArrayList<>();
+ final List<String> keys = this.getDictKeys(word, postag);
+ // lookup lemma as value of the map
+ final List<String> keyValues = this.dictMap.get(keys);
+ if (!keyValues.isEmpty()) {
+ lemmasList.addAll(keyValues);
+ } else {
+ lemmasList.add("O");
+ }
+ return lemmasList;
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8a3b3b53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
index fc1a558..a8d71e8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
@@ -49,7 +49,7 @@ public class LemmaSampleEventStream extends AbstractEventStream<LemmaSample> {
List<Event> events = new ArrayList<>();
String[] toksArray = sample.getTokens();
String[] tagsArray = sample.getTags();
- String[] lemmasArray = sample.getLemmas();
+ String[] lemmasArray = LemmatizerME.encodeLemmas(toksArray,sample.getLemmas());
for (int ei = 0, el = sample.getTokens().length; ei < el; ei++) {
events.add(new Event(lemmasArray[ei],
contextGenerator.getContext(ei,toksArray,tagsArray,lemmasArray)));
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8a3b3b53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
index 0a133c3..9c661a5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
@@ -23,7 +23,6 @@ import java.util.List;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.StringUtil;
/**
@@ -51,8 +50,7 @@ public class LemmaSampleStream extends FilterObjectStream<String, LemmaSample> {
else {
toks.add(parts[0]);
tags.add(parts[1]);
- String ses = StringUtil.getShortestEditScript(parts[0], parts[2]);
- preds.add(ses);
+ preds.add(parts[2]);
}
}
if (toks.size() > 0) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8a3b3b53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
index f21f9e3..933eec1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
@@ -17,19 +17,31 @@
package opennlp.tools.lemmatizer;
+import java.util.List;
+
/**
* The interface for lemmatizers.
*/
public interface Lemmatizer {
/**
- * Generates lemma tags for the word and postag returning the result in an array.
+ * Generates lemmas for the word and postag returning the result in an array.
*
* @param toks an array of the tokens
* @param tags an array of the pos tags
*
- * @return an array of lemma classes for each token in the sequence.
+ * @return an array of possible lemmas for each token in the sequence.
*/
String[] lemmatize(String[] toks, String[] tags);
+ /**
+ * Generates a lemma tags for the word and postag returning the result in a list
+ * of every possible lemma for each token and postag.
+ *
+ * @param toks an array of the tokens
+ * @param tags an array of the pos tags
+ * @return a list of every possible lemma for each token in the sequence.
+ */
+ List<List<String>> lemmatize(List<String> toks, List<String> tags);
+
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8a3b3b53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
index 4855fda..2b8122f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
@@ -19,6 +19,7 @@ package opennlp.tools.lemmatizer;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -47,6 +48,7 @@ import opennlp.tools.util.TrainingParameters;
*/
public class LemmatizerME implements Lemmatizer {
+ public static final int LEMMA_NUMBER = 29;
public static final int DEFAULT_BEAM_SIZE = 3;
protected int beamSize;
private Sequence bestSequence;
@@ -86,9 +88,52 @@ public class LemmatizerME implements Lemmatizer {
}
public String[] lemmatize(String[] toks, String[] tags) {
+ String[] ses = predictSES(toks, tags);
+ String[] lemmas = decodeLemmas(toks, ses);
+ return lemmas;
+ }
+
+ @Override public List<List<String>> lemmatize(List<String> toks,
+ List<String> tags) {
+ String[] tokens = toks.toArray(new String[toks.size()]);
+ String[] posTags = tags.toArray(new String[tags.size()]);
+ String[][] allLemmas = predictLemmas(LEMMA_NUMBER, tokens, posTags);
+ List<List<String>> predictedLemmas = new ArrayList<>();
+ for (int i = 0; i < allLemmas.length; i++) {
+ predictedLemmas.add(Arrays.asList(allLemmas[i]));
+ }
+ return predictedLemmas;
+ }
+
+ /**
+ * Predict Short Edit Script (automatically induced lemma class).
+ * @param toks the array of tokens
+ * @param tags the array of pos tags
+ * @return an array containing the lemma classes
+ */
+ public String[] predictSES(String[] toks, String[] tags) {
bestSequence = model.bestSequence(toks, new Object[] {tags}, contextGenerator, sequenceValidator);
- List<String> c = bestSequence.getOutcomes();
- return c.toArray(new String[c.size()]);
+ List<String> ses = bestSequence.getOutcomes();
+ return ses.toArray(new String[ses.size()]);
+ }
+
+ /**
+ * Predict all possible lemmas (using a default upper bound).
+ * @param numLemmas the default number of lemmas
+ * @param toks the tokens
+ * @param tags the postags
+ * @return a double array containing all posible lemmas for each token and postag pair
+ */
+ public String[][] predictLemmas(int numLemmas, String[] toks, String[] tags) {
+ Sequence[] bestSequences = model.bestSequences(numLemmas, toks, new Object[] {tags},
+ contextGenerator, sequenceValidator);
+ String[][] allLemmas = new String[bestSequences.length][];
+ for (int i = 0; i < allLemmas.length; i++) {
+ List<String> ses = bestSequences[i].getOutcomes();
+ String[] sesArray = ses.toArray(new String[ses.size()]);
+ allLemmas[i] = decodeLemmas(toks,sesArray);
+ }
+ return allLemmas;
}
/**
@@ -97,11 +142,10 @@ public class LemmatizerME implements Lemmatizer {
* @param preds the predicted lemma classes
* @return the array of decoded lemmas
*/
- public String[] decodeLemmas(String[] toks, String[] preds) {
+ public static String[] decodeLemmas(String[] toks, String[] preds) {
List<String> lemmas = new ArrayList<>();
for (int i = 0; i < toks.length; i++) {
String lemma = StringUtil.decodeShortestEditScript(toks[i].toLowerCase(), preds[i]);
- //System.err.println("-> DEBUG: " + toks[i].toLowerCase() + " " + preds[i] + " " + lemma);
if (lemma.length() == 0) {
lemma = "_";
}
@@ -110,6 +154,18 @@ public class LemmatizerME implements Lemmatizer {
return lemmas.toArray(new String[lemmas.size()]);
}
+ public static String[] encodeLemmas(String[] toks, String[] lemmas) {
+ List<String> sesList = new ArrayList<>();
+ for (int i = 0; i < toks.length; i++) {
+ String ses = StringUtil.getShortestEditScript(toks[i], lemmas[i]);
+ if (ses.length() == 0) {
+ ses = "_";
+ }
+ sesList.add(ses);
+ }
+ return sesList.toArray(new String[sesList.size()]);
+ }
+
public Sequence[] topKSequences(String[] sentence, String[] tags) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
new Object[] { tags }, contextGenerator, sequenceValidator);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8a3b3b53/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
index 489ba38..dcfc883 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
@@ -19,6 +19,7 @@ package opennlp.tools.lemmatizer;
import java.io.IOException;
import java.util.Arrays;
+import java.util.List;
/**
* This dummy lemmatizer implementation simulates a LemmatizerME. The file has
@@ -56,4 +57,10 @@ public class DummyLemmatizer implements Lemmatizer {
}
}
+ @Override
+ public List<List<String>> lemmatize(List<String> toks,
+ List<String> tags) {
+ return null;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8a3b3b53/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
index 76b4cd5..97dcc3c 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
@@ -82,8 +82,7 @@ public class LemmatizerMETest {
@Test
public void testLemmasAsArray() throws Exception {
- String[] preds = lemmatizer.lemmatize(tokens, postags);
- String[] lemmas = lemmatizer.decodeLemmas(tokens, preds);
+ String[] lemmas = lemmatizer.lemmatize(tokens, postags);
Assert.assertArrayEquals(expect, lemmas);
}
[06/24] opennlp git commit: OPENNLP-176: Switch language codes to
ISO-639-3
Posted by co...@apache.org.
OPENNLP-176: Switch language codes to ISO-639-3
This closes #114
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/33871c3d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/33871c3d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/33871c3d
Branch: refs/heads/OPENNLP-778
Commit: 33871c3db6d870f460636f6205bc23f8d48fdd4d
Parents: bbd6d3f
Author: J�rn Kottmann <jo...@apache.org>
Authored: Sun Jan 29 11:06:08 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Fri Feb 17 11:01:51 2017 +0100
----------------------------------------------------------------------
.../cmdline/namefind/CensusDictionaryCreatorTool.java | 2 +-
.../opennlp/tools/cmdline/parser/ParserTrainerTool.java | 4 ++--
.../tools/formats/AbstractSampleStreamFactory.java | 2 +-
.../tools/formats/Conll03NameSampleStreamFactory.java | 6 +++---
.../main/java/opennlp/tools/sentdetect/lang/Factory.java | 10 +++++-----
5 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
index 6042510..f9bf5e0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
@@ -50,7 +50,7 @@ public class CensusDictionaryCreatorTool extends BasicCmdLineTool {
interface Parameters {
@ParameterDescription(valueName = "code")
- @OptionalParameter(defaultValue = "en")
+ @OptionalParameter(defaultValue = "eng")
String getLang();
@ParameterDescription(valueName = "charsetName")
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
index 3a8dd5a..2709fd5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
@@ -90,10 +90,10 @@ public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerT
params.getHeadRulesSerializerImpl());
}
else {
- if ("en".equals(params.getLang())) {
+ if ("en".equals(params.getLang()) || "eng".equals(params.getLang())) {
headRulesSerializer = new opennlp.tools.parser.lang.en.HeadRules.HeadRulesSerializer();
}
- else if ("es".equals(params.getLang())) {
+ else if ("es".equals(params.getLang()) || "spa".equals(params.getLang())) {
headRulesSerializer = new opennlp.tools.parser.lang.es.AncoraSpanishHeadRules.HeadRulesSerializer();
}
else {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
index 6a7690e..33d0f95 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
@@ -34,7 +34,7 @@ public abstract class AbstractSampleStreamFactory<T> implements ObjectStreamFact
}
public String getLang() {
- return "en";
+ return "eng";
}
@SuppressWarnings({"unchecked"})
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
index 878565f..599d48a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
@@ -32,7 +32,7 @@ import opennlp.tools.util.ObjectStream;
public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> {
interface Parameters extends BasicFormatParams {
- @ParameterDescription(valueName = "en|de")
+ @ParameterDescription(valueName = "eng|deu")
String getLang();
@ParameterDescription(valueName = "per,loc,org,misc")
@@ -54,11 +54,11 @@ public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<
// TODO: support the other languages with this CoNLL.
LANGUAGE lang;
- if ("en".equals(params.getLang())) {
+ if ("eng".equals(params.getLang())) {
lang = LANGUAGE.EN;
language = params.getLang();
}
- else if ("de".equals(params.getLang())) {
+ else if ("deu".equals(params.getLang())) {
lang = LANGUAGE.DE;
language = params.getLang();
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
index 28b515b..4a34229 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
@@ -49,9 +49,9 @@ public class Factory {
public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) {
- if ("th".equals(languageCode)) {
+ if ("th".equals(languageCode) || "tha".equals(languageCode)) {
return new SentenceContextGenerator();
- } else if ("pt".equals(languageCode)) {
+ } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
return new DefaultSDContextGenerator(abbreviations, ptEosCharacters);
}
@@ -68,11 +68,11 @@ public class Factory {
}
public char[] getEOSCharacters(String languageCode) {
- if ("th".equals(languageCode)) {
+ if ("th".equals(languageCode) || "tha".equals(languageCode)) {
return thEosCharacters;
- } else if ("pt".equals(languageCode)) {
+ } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
return ptEosCharacters;
- } else if ("jp".equals(languageCode)) {
+ } else if ("jp".equals(languageCode) || "jpn".equals(languageCode)) {
return jpEosCharacters;
}
[12/24] opennlp git commit: closes apache/opennlp#132 *Already Merged*
Posted by co...@apache.org.
closes apache/opennlp#132 *Already Merged*
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/f2f987d4
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/f2f987d4
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/f2f987d4
Branch: refs/heads/OPENNLP-778
Commit: f2f987d47e04f2bd498cd8943b3f52d537d11824
Parents: decaab5
Author: smarthi <sm...@apache.org>
Authored: Thu Feb 23 07:49:11 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Thu Feb 23 07:49:11 2017 -0500
----------------------------------------------------------------------
----------------------------------------------------------------------
[13/24] opennlp git commit: OPENNLP-996:Remove heap memory settings
from Opennlp-tools
Posted by co...@apache.org.
OPENNLP-996:Remove heap memory settings from Opennlp-tools
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/61901ab5
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/61901ab5
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/61901ab5
Branch: refs/heads/OPENNLP-778
Commit: 61901ab58d011d6e60d483129fc87a644c868803
Parents: f2f987d
Author: smarthi <sm...@apache.org>
Authored: Sun Feb 26 12:56:04 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Sun Feb 26 12:56:04 2017 -0500
----------------------------------------------------------------------
opennlp-tools/pom.xml | 5 -----
1 file changed, 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/61901ab5/opennlp-tools/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index d2630c9..663e903 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -33,10 +33,6 @@
<packaging>bundle</packaging>
<name>Apache OpenNLP Tools</name>
- <properties>
- <argLine>-Xmx4096m</argLine>
- </properties>
-
<dependencies>
<dependency>
<groupId>org.osgi</groupId>
@@ -81,7 +77,6 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
- <argLine>@{argLine}</argLine>
<excludes>
<exclude>/opennlp/tools/eval/**/*</exclude>
</excludes>
[07/24] opennlp git commit: OPENNLP-982: Allow loading of 1.5.x models
Posted by co...@apache.org.
OPENNLP-982: Allow loading of 1.5.x models
This closes #129
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/8e610f18
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/8e610f18
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/8e610f18
Branch: refs/heads/OPENNLP-778
Commit: 8e610f183caf1829337559e63a68c6a7d4f1ba05
Parents: 33871c3
Author: J�rn Kottmann <jo...@apache.org>
Authored: Fri Feb 17 12:32:11 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Fri Feb 17 13:43:39 2017 +0100
----------------------------------------------------------------------
.../src/main/java/opennlp/tools/util/model/BaseModel.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/8e610f18/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
index 20acd9d..f70fb03 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
@@ -416,7 +416,7 @@ public abstract class BaseModel implements ArtifactProvider, Serializable {
// Major and minor version must match, revision might be
// this check allows for the use of models of n minor release behind current minor release
if (Version.currentVersion().getMajor() != version.getMajor() ||
- Version.currentVersion().getMinor() - 2 > version.getMinor()) {
+ Version.currentVersion().getMinor() - 3 > version.getMinor()) {
throw new InvalidFormatException("Model version " + version + " is not supported by this ("
+ Version.currentVersion() + ") version of OpenNLP!");
}
[03/24] opennlp git commit: OPENNLP-983: Make suffix/prefix length
configurable
Posted by co...@apache.org.
OPENNLP-983: Make suffix/prefix length configurable
This closes #121
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/b7d3abce
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/b7d3abce
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/b7d3abce
Branch: refs/heads/OPENNLP-778
Commit: b7d3abce569b5a4bf0ae39b24c6ac9920032db01
Parents: 929595d
Author: jzonthemtn <je...@mtnfog.com>
Authored: Mon Feb 13 07:57:21 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Thu Feb 16 10:04:08 2017 +0100
----------------------------------------------------------------------
.../tools/util/featuregen/GeneratorFactory.java | 22 ++++-
.../util/featuregen/PrefixFeatureGenerator.java | 32 +++++--
.../util/featuregen/SuffixFeatureGenerator.java | 33 +++++--
.../featuregen/PrefixFeatureGeneratorTest.java | 92 ++++++++++++++++++++
.../featuregen/SuffixFeatureGeneratorTest.java | 92 ++++++++++++++++++++
5 files changed, 251 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/b7d3abce/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index fa97f43..ef08cfb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -555,7 +555,16 @@ public class GeneratorFactory {
public AdaptiveFeatureGenerator create(Element generatorElement,
FeatureGeneratorResourceProvider resourceManager) {
- return new PrefixFeatureGenerator();
+
+ String attribute = generatorElement.getAttribute("length");
+
+ int prefixLength = PrefixFeatureGenerator.DEFAULT_MAX_LENGTH;
+
+ if (!Objects.equals(attribute, "")) {
+ prefixLength = Integer.parseInt(attribute);
+ }
+
+ return new PrefixFeatureGenerator(prefixLength);
}
static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
@@ -570,7 +579,16 @@ public class GeneratorFactory {
public AdaptiveFeatureGenerator create(Element generatorElement,
FeatureGeneratorResourceProvider resourceManager) {
- return new SuffixFeatureGenerator();
+
+ String attribute = generatorElement.getAttribute("length");
+
+ int suffixLength = SuffixFeatureGenerator.DEFAULT_MAX_LENGTH;
+
+ if (!Objects.equals(attribute, "")) {
+ suffixLength = Integer.parseInt(attribute);
+ }
+
+ return new SuffixFeatureGenerator(suffixLength);
}
static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/b7d3abce/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
index 8cdd48f..04fcd15 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
@@ -21,21 +21,35 @@ import java.util.List;
public class PrefixFeatureGenerator implements AdaptiveFeatureGenerator {
- private static final int PREFIX_LENGTH = 4;
-
- private static String[] getPrefixes(String lex) {
- String[] prefs = new String[PREFIX_LENGTH];
- for (int li = 0; li < PREFIX_LENGTH; li++) {
- prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
- }
- return prefs;
+ static final int DEFAULT_MAX_LENGTH = 4;
+
+ private final int prefixLength;
+
+ public PrefixFeatureGenerator() {
+ prefixLength = DEFAULT_MAX_LENGTH;
+ }
+
+ public PrefixFeatureGenerator(int prefixLength) {
+ this.prefixLength = prefixLength;
}
+ @Override
public void createFeatures(List<String> features, String[] tokens, int index,
String[] previousOutcomes) {
- String[] prefs = PrefixFeatureGenerator.getPrefixes(tokens[index]);
+ String[] prefs = getPrefixes(tokens[index]);
for (String pref : prefs) {
features.add("pre=" + pref);
}
}
+
+ private String[] getPrefixes(String lex) {
+
+ int prefixes = Math.min(prefixLength, lex.length());
+
+ String[] prefs = new String[prefixes];
+ for (int li = 0; li < prefixes; li++) {
+ prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
+ }
+ return prefs;
+ }
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/b7d3abce/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
index a17fd47..c626fd9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
@@ -21,21 +21,36 @@ import java.util.List;
public class SuffixFeatureGenerator implements AdaptiveFeatureGenerator {
- private static final int SUFFIX_LENGTH = 4;
-
- public static String[] getSuffixes(String lex) {
- String[] suffs = new String[SUFFIX_LENGTH];
- for (int li = 0; li < SUFFIX_LENGTH; li++) {
- suffs[li] = lex.substring(Math.max(lex.length() - li - 1, 0));
- }
- return suffs;
+ static final int DEFAULT_MAX_LENGTH = 4;
+
+ private final int suffixLength;
+
+ public SuffixFeatureGenerator() {
+ suffixLength = DEFAULT_MAX_LENGTH;
+ }
+
+ public SuffixFeatureGenerator(int suffixLength) {
+ this.suffixLength = suffixLength;
}
+ @Override
public void createFeatures(List<String> features, String[] tokens, int index,
String[] previousOutcomes) {
- String[] suffs = SuffixFeatureGenerator.getSuffixes(tokens[index]);
+ String[] suffs = getSuffixes(tokens[index]);
for (String suff : suffs) {
features.add("suf=" + suff);
}
}
+
+ private String[] getSuffixes(String lex) {
+
+ int suffixes = Math.min(suffixLength, lex.length());
+
+ String[] suffs = new String[suffixes];
+ for (int li = 0; li < suffixes; li++) {
+ suffs[li] = lex.substring(Math.max(lex.length() - li - 1, 0));
+ }
+ return suffs;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/b7d3abce/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
new file mode 100644
index 0000000..5639174
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class PrefixFeatureGeneratorTest {
+
+ private List<String> features;
+
+ @Before
+ public void setUp() throws Exception {
+ features = new ArrayList<>();
+ }
+
+ @Test
+ public void lengthTest1() {
+
+ String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+ int testTokenIndex = 0;
+ int suffixLength = 2;
+
+ AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
+
+ generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+ Assert.assertEquals(2, features.size());
+ Assert.assertEquals("pre=T", features.get(0));
+ Assert.assertEquals("pre=Th", features.get(1));
+
+ }
+
+ @Test
+ public void lengthTest2() {
+
+ String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+ int testTokenIndex = 3;
+ int suffixLength = 5;
+
+ AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
+
+ generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+ Assert.assertEquals(5, features.size());
+ Assert.assertEquals("pre=e", features.get(0));
+ Assert.assertEquals("pre=ex", features.get(1));
+ Assert.assertEquals("pre=exa", features.get(2));
+ Assert.assertEquals("pre=exam", features.get(3));
+ Assert.assertEquals("pre=examp", features.get(4));
+
+ }
+
+ @Test
+ public void lengthTest3() {
+
+ String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+ int testTokenIndex = 1;
+ int suffixLength = 5;
+
+ AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);
+
+ generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+ Assert.assertEquals(2, features.size());
+ Assert.assertEquals("pre=i", features.get(0));
+ Assert.assertEquals("pre=is", features.get(1));
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/b7d3abce/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
new file mode 100644
index 0000000..fcb23a6
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class SuffixFeatureGeneratorTest {
+
+ private List<String> features;
+
+ @Before
+ public void setUp() throws Exception {
+ features = new ArrayList<>();
+ }
+
+ @Test
+ public void lengthTest1() {
+
+ String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+ int testTokenIndex = 0;
+ int suffixLength = 2;
+
+ AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
+
+ generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+ Assert.assertEquals(2, features.size());
+ Assert.assertEquals("suf=s", features.get(0));
+ Assert.assertEquals("suf=is", features.get(1));
+
+ }
+
+ @Test
+ public void lengthTest2() {
+
+ String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+ int testTokenIndex = 3;
+ int suffixLength = 5;
+
+ AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
+
+ generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+ Assert.assertEquals(5, features.size());
+ Assert.assertEquals("suf=e", features.get(0));
+ Assert.assertEquals("suf=le", features.get(1));
+ Assert.assertEquals("suf=ple", features.get(2));
+ Assert.assertEquals("suf=mple", features.get(3));
+ Assert.assertEquals("suf=ample", features.get(4));
+
+ }
+
+ @Test
+ public void lengthTest3() {
+
+ String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+ int testTokenIndex = 1;
+ int suffixLength = 5;
+
+ AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);
+
+ generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+ Assert.assertEquals(2, features.size());
+ Assert.assertEquals("suf=s", features.get(0));
+ Assert.assertEquals("suf=is", features.get(1));
+
+ }
+}
[14/24] opennlp git commit: closes apache/opennlp#134 *Already Merged*
Posted by co...@apache.org.
closes apache/opennlp#134 *Already Merged*
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a5bdd609
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a5bdd609
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a5bdd609
Branch: refs/heads/OPENNLP-778
Commit: a5bdd6090aad21dd7e16fc90f321267ee9d6953a
Parents: 61901ab
Author: smarthi <sm...@apache.org>
Authored: Sun Feb 26 12:57:28 2017 -0500
Committer: smarthi <sm...@apache.org>
Committed: Sun Feb 26 12:57:28 2017 -0500
----------------------------------------------------------------------
----------------------------------------------------------------------