You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/06/08 10:24:51 UTC
opennlp git commit: OPENNLP-1065: Use ISO-639-3 in test code
Repository: opennlp
Updated Branches:
refs/heads/master b2a2d2d2d -> aae0f2997
OPENNLP-1065: Use ISO-639-3 in test code
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/aae0f299
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/aae0f299
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/aae0f299
Branch: refs/heads/master
Commit: aae0f29972d6323ad51ef18cdbc84d643a4739d8
Parents: b2a2d2d
Author: Jörn Kottmann <jo...@apache.org>
Authored: Wed May 17 11:24:23 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Thu Jun 8 12:15:05 2017 +0200
----------------------------------------------------------------------
.../tools/formats/Conll02NameSampleStream.java | 8 +-
.../formats/Conll02NameSampleStreamFactory.java | 10 +-
.../opennlp/tools/tokenize/lang/Factory.java | 2 +-
.../tools/chunker/ChunkerFactoryTest.java | 2 +-
.../opennlp/tools/chunker/ChunkerMETest.java | 4 +-
.../tools/cmdline/TokenNameFinderToolTest.java | 2 +-
.../opennlp/tools/eval/ArvoresDeitadasEval.java | 11 +-
.../opennlp/tools/eval/Conll00ChunkerEval.java | 2 +-
.../tools/eval/Conll02NameFinderEval.java | 180 +++++++++----------
.../opennlp/tools/eval/ConllXPosTaggerEval.java | 16 +-
.../tools/eval/OntoNotes4NameFinderEval.java | 4 +-
.../tools/eval/OntoNotes4ParserEval.java | 2 +-
.../tools/eval/OntoNotes4PosTaggerEval.java | 2 +-
.../tools/eval/SourceForgeModelEval.java | 6 +-
.../formats/Conll02NameSampleStreamTest.java | 7 +-
.../formats/ad/ADTokenSampleStreamTest.java | 2 +-
.../tools/lemmatizer/LemmatizerMETest.java | 4 +-
.../tools/namefind/NameFinderMETest.java | 14 +-
.../TokenNameFinderCrossValidatorTest.java | 6 +-
.../tools/parser/chunking/ParserTest.java | 2 +-
.../tools/parser/treeinsert/ParserTest.java | 2 +-
.../tools/postag/POSTaggerFactoryTest.java | 2 +-
.../opennlp/tools/postag/POSTaggerMETest.java | 4 +-
.../tools/sentdetect/SDEventStreamTest.java | 4 +-
.../sentdetect/SentenceDetectorFactoryTest.java | 12 +-
.../sentdetect/SentenceDetectorMETest.java | 10 +-
.../tools/tokenize/TokenizerFactoryTest.java | 10 +-
.../opennlp/tools/tokenize/TokenizerMETest.java | 2 +-
.../tools/tokenize/TokenizerTestUtil.java | 4 +-
29 files changed, 166 insertions(+), 170 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
index f3c2a81..8c71f50 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
@@ -50,8 +50,8 @@ import opennlp.tools.util.StringUtil;
public class Conll02NameSampleStream implements ObjectStream<NameSample> {
public enum LANGUAGE {
- NL,
- ES
+ NLD,
+ SPA
}
public static final int GENERATE_PERSON_ENTITIES = 0x01;
@@ -120,7 +120,7 @@ public class Conll02NameSampleStream implements ObjectStream<NameSample> {
String line;
while ((line = lineStream.read()) != null && !StringUtil.isEmpty(line)) {
- if (LANGUAGE.NL.equals(lang) && line.startsWith(DOCSTART)) {
+ if (LANGUAGE.NLD.equals(lang) && line.startsWith(DOCSTART)) {
isClearAdaptiveData = true;
continue;
}
@@ -138,7 +138,7 @@ public class Conll02NameSampleStream implements ObjectStream<NameSample> {
}
// Always clear adaptive data for spanish
- if (LANGUAGE.ES.equals(lang))
+ if (LANGUAGE.SPA.equals(lang))
isClearAdaptiveData = true;
if (sentence.size() > 0) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java
index bfb3170..f19328a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java
@@ -35,7 +35,7 @@ import opennlp.tools.util.ObjectStream;
public class Conll02NameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> {
interface Parameters extends BasicFormatParams {
- @ParameterDescription(valueName = "es|nl")
+ @ParameterDescription(valueName = "spa|nld")
String getLang();
@ParameterDescription(valueName = "per,loc,org,misc")
@@ -56,12 +56,12 @@ public class Conll02NameSampleStreamFactory extends LanguageSampleStreamFactory<
Parameters params = ArgumentParser.parse(args, Parameters.class);
LANGUAGE lang;
- if ("nl".equals(params.getLang())) {
- lang = LANGUAGE.NL;
+ if ("nl".equals(params.getLang()) || "nld".equals(params.getLang())) {
+ lang = LANGUAGE.NLD;
language = params.getLang();
}
- else if ("es".equals(params.getLang())) {
- lang = LANGUAGE.ES;
+ else if ("es".equals(params.getLang()) || "spa".equals(params.getLang())) {
+ lang = LANGUAGE.SPA;
language = params.getLang();
}
else {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java
index ef2a9f8..fb15317 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java
@@ -37,7 +37,7 @@ public class Factory {
* @return the alpha numeric pattern for the language or the default pattern.
*/
public Pattern getAlphanumeric(String languageCode) {
- if ("pt".equals(languageCode)) {
+ if ("pt".equals(languageCode) || "por".equals(languageCode)) {
return Pattern.compile("^[0-9a-záãâàéêíóõôúüçA-ZÁÃÂÀÉÊÍÓÕÔÚÜÇ]+$");
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java
index c75030a..4205197 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java
@@ -47,7 +47,7 @@ public class ChunkerFactoryTest {
private static ChunkerModel trainModel(ModelType type, ChunkerFactory factory)
throws IOException {
- return ChunkerME.train("en", createSampleStream(),
+ return ChunkerME.train("eng", createSampleStream(),
TrainingParameters.defaultParams(), factory);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
index 3c04894..b7654fb 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
@@ -78,7 +78,7 @@ public class ChunkerMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- ChunkerModel chunkerModel = ChunkerME.train("en", sampleStream, params, new ChunkerFactory());
+ ChunkerModel chunkerModel = ChunkerME.train("eng", sampleStream, params, new ChunkerFactory());
this.chunker = new ChunkerME(chunkerModel);
}
@@ -143,7 +143,7 @@ public class ChunkerMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- ChunkerME.train("en", sampleStream, params, new ChunkerFactory());
+ ChunkerME.train("eng", sampleStream, params, new ChunkerFactory());
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
index a163b0c..e4a7fc6 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
@@ -110,7 +110,7 @@ public class TokenNameFinderToolTest {
TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();
try (ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream)) {
- model = NameFinderME.train("en", null, sampleStream, params,
+ model = NameFinderME.train("eng", null, sampleStream, params,
nameFinderFactory);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java
index dd54480..6ee3eb0 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java
@@ -18,8 +18,8 @@
package opennlp.tools.eval;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
import org.junit.Assert;
import org.junit.Test;
@@ -66,14 +66,12 @@ public class ArvoresDeitadasEval {
private static final String BOSQUE = "ad/Bosque_CF_8.0.ad.txt";
private static final String FLORESTA_VIRGEM = "ad/FlorestaVirgem_CF_3.0_ad.txt";
- private static final String ENCODING = "ISO-8859-1";
-
- private static final String LANG = "pt";
+ private static final String LANG = "por";
private static ObjectStream<String> getLineSample(String corpus)
throws IOException {
return new PlainTextByLineStream(new MarkableFileInputStreamFactory(
- new File(EvalUtil.getOpennlpDataDir(), corpus)), ENCODING);
+ new File(EvalUtil.getOpennlpDataDir(), corpus)), StandardCharsets.ISO_8859_1);
}
private static void sentenceCrossEval(TrainingParameters params,
@@ -99,8 +97,7 @@ public class ArvoresDeitadasEval {
getLineSample(FLORESTA_VIRGEM), true);
DictionaryDetokenizer detokenizer = new DictionaryDetokenizer(
- new DetokenizationDictionary(new FileInputStream(new File(
- "lang/pt/tokenizer/pt-detokenizer.xml"))));
+ new DetokenizationDictionary(new File("lang/pt/tokenizer/pt-detokenizer.xml")));
ObjectStream<TokenSample> samples = new NameToTokenSampleStream(
detokenizer, nameSamples);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
index 62d4a46..fd3e054 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java
@@ -52,7 +52,7 @@ public class Conll00ChunkerEval {
new PlainTextByLineStream(
new MarkableFileInputStreamFactory(trainFile), StandardCharsets.UTF_8));
- return ChunkerME.train("en", samples, params, new ChunkerFactory());
+ return ChunkerME.train("eng", samples, params, new ChunkerFactory());
}
private static void eval(ChunkerModel model, File testData,
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
index c064e3f..c233686 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
@@ -94,13 +94,13 @@ public class Conll02NameFinderEval {
public void evalDutchPersonPerceptron() throws IOException {
TrainingParameters params = EvalUtil.createPerceptronParams();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.6238361266294227d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.744312026002167d);
}
@@ -108,13 +108,13 @@ public class Conll02NameFinderEval {
public void evalDutchPersonMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.5696539485359361d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7127771911298839d);
}
@@ -122,13 +122,13 @@ public class Conll02NameFinderEval {
public void evalDutchPersonMaxentQn() throws IOException {
TrainingParameters params = EvalUtil.createMaxentQnParams();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.6363636363636364d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7482403898213319d);
}
@@ -136,13 +136,13 @@ public class Conll02NameFinderEval {
public void evalDutchOrganizationPerceptron() throws IOException {
TrainingParameters params = EvalUtil.createPerceptronParams();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6081871345029239d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6502808988764045d);
}
@@ -150,13 +150,13 @@ public class Conll02NameFinderEval {
public void evalDutchOrganizationMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5197969543147207d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5753228120516498d);
}
@@ -164,13 +164,13 @@ public class Conll02NameFinderEval {
public void evalDutchOrganizationMaxentQn() throws IOException {
TrainingParameters params = EvalUtil.createMaxentQnParams();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5412748171368861d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5764966740576497d);
}
@@ -178,13 +178,13 @@ public class Conll02NameFinderEval {
public void evalDutchLocationPerceptron() throws IOException {
TrainingParameters params = EvalUtil.createPerceptronParams();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7978609625668449d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7880434782608695d);
}
@@ -192,13 +192,13 @@ public class Conll02NameFinderEval {
public void evalDutchLocationMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.5451977401129944d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.680952380952381d);
}
@@ -206,13 +206,13 @@ public class Conll02NameFinderEval {
public void evalDutchLocationMaxentQn() throws IOException {
TrainingParameters params = EvalUtil.createMaxentQnParams();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6737683089214381d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7433903576982893d);
}
@@ -220,13 +220,13 @@ public class Conll02NameFinderEval {
public void evalDutchMiscPerceptron() throws IOException {
TrainingParameters params = EvalUtil.createPerceptronParams();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.6651198762567672d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.6748166259168704d);
}
@@ -234,13 +234,13 @@ public class Conll02NameFinderEval {
public void evalDutchMiscMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5831157528285466d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5762897914379803d);
}
@@ -248,13 +248,13 @@ public class Conll02NameFinderEval {
public void evalDutchMiscMaxentQn() throws IOException {
TrainingParameters params = EvalUtil.createMaxentQnParams();
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.4227642276422764d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL,
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.455294863665187d);
}
@@ -267,12 +267,12 @@ public class Conll02NameFinderEval {
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
combinedType);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL, combinedType, 0.727808326787117d);
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.727808326787117d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.7388253638253639d);
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.7388253638253639d);
}
@Test
@@ -284,12 +284,12 @@ public class Conll02NameFinderEval {
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
combinedType);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL, combinedType, 0.6673209028459275d);
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6673209028459275d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.6984085910208306d);
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.6984085910208306d);
}
@Test
@@ -301,25 +301,25 @@ public class Conll02NameFinderEval {
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
- TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
+ TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
combinedType);
- eval(maxentModel, dutchTestAFile, LANGUAGE.NL, combinedType, 0.6999800915787379d);
+ eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6999800915787379d);
- eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.7101430258496261d);
+ eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.7101430258496261d);
}
@Test
public void evalSpanishPersonPerceptron() throws IOException {
TrainingParameters params = EvalUtil.createPerceptronParams();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8331210191082803d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8419705694177864d);
}
@@ -327,13 +327,13 @@ public class Conll02NameFinderEval {
public void evalSpanishPersonMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.686960933536276d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8132033008252063d);
}
@@ -342,13 +342,13 @@ public class Conll02NameFinderEval {
public void evalSpanishPersonMaxentQn() throws IOException {
TrainingParameters params = EvalUtil.createMaxentQnParams();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7454634624816087d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8215339233038348d);
}
@@ -356,13 +356,13 @@ public class Conll02NameFinderEval {
public void evalSpanishOrganizationPerceptron() throws IOException {
TrainingParameters params = EvalUtil.createPerceptronParams();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7478819748758399d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7715330894579315d);
}
@@ -370,13 +370,13 @@ public class Conll02NameFinderEval {
public void evalSpanishOrganizationMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6982288828337874d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7640449438202247d);
}
@@ -384,13 +384,13 @@ public class Conll02NameFinderEval {
public void evalSpanishOrganizationMaxentQn() throws IOException {
TrainingParameters params = EvalUtil.createMaxentQnParams();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.682961897915169d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7776447105788423d);
}
@@ -398,13 +398,13 @@ public class Conll02NameFinderEval {
public void evalSpanishLocationPerceptron() throws IOException {
TrainingParameters params = EvalUtil.createPerceptronParams();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7018867924528303d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6315158777711205d);
}
@@ -412,13 +412,13 @@ public class Conll02NameFinderEval {
public void evalSpanishLocationMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7386907929749867d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6772777167947311d);
}
@@ -426,13 +426,13 @@ public class Conll02NameFinderEval {
public void evalSpanishLocationMaxentQn() throws IOException {
TrainingParameters params = EvalUtil.createMaxentQnParams();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7544565842438182d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7005019520356944d);
}
@@ -440,13 +440,13 @@ public class Conll02NameFinderEval {
public void evalSpanishMiscPerceptron() throws IOException {
TrainingParameters params = EvalUtil.createPerceptronParams();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5102880658436214d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5842696629213483d);
}
@@ -454,13 +454,13 @@ public class Conll02NameFinderEval {
public void evalSpanishMiscMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.40971168437025796d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.45703124999999994d);
}
@@ -468,13 +468,13 @@ public class Conll02NameFinderEval {
public void evalSpanishMiscMaxentQn() throws IOException {
TrainingParameters params = EvalUtil.createMaxentQnParams();
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.470219435736677d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES,
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5020576131687243d);
}
@@ -487,12 +487,12 @@ public class Conll02NameFinderEval {
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
combinedType);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.7476700838769804d);
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.7476700838769804d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7692307692307693d);
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7692307692307693d);
}
@Test
@@ -504,12 +504,12 @@ public class Conll02NameFinderEval {
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
combinedType);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.707400023454908d);
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.707400023454908d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7576868829337094d);
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7576868829337094d);
}
@Test
@@ -521,11 +521,11 @@ public class Conll02NameFinderEval {
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
- TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
+ TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
combinedType);
- eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.7455564833591795d);
+ eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.7455564833591795d);
- eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7856735159817352d);
+ eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7856735159817352d);
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
index 98a0ded..6f6ce3a 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
@@ -82,7 +82,7 @@ public class ConllXPosTaggerEval {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
- "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params);
+ "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "dan", params);
eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
"conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9504442925495558d);
@@ -93,7 +93,7 @@ public class ConllXPosTaggerEval {
TrainingParameters params = EvalUtil.createMaxentQnParams();
POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
- "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params);
+ "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "dan", params);
eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
"conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9564251537935748d);
@@ -104,7 +104,7 @@ public class ConllXPosTaggerEval {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
- "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", params);
+ "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nld", params);
eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
"conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9213965980304387d);
@@ -116,7 +116,7 @@ public class ConllXPosTaggerEval {
TrainingParameters params = EvalUtil.createMaxentQnParams();
POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
- "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", params);
+ "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nld", params);
eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
"conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9282005371530886d);
@@ -127,7 +127,7 @@ public class ConllXPosTaggerEval {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
- "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", params);
+ "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "por", params);
eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
"conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9671041418101244d);
@@ -138,7 +138,7 @@ public class ConllXPosTaggerEval {
TrainingParameters params = EvalUtil.createMaxentQnParams();
POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
- "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", params);
+ "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "por", params);
eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
"conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9662519175046872d);
@@ -149,7 +149,7 @@ public class ConllXPosTaggerEval {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
- "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", params);
+ "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "swe", params);
eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
"conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9248585572842999d);
@@ -160,7 +160,7 @@ public class ConllXPosTaggerEval {
TrainingParameters params = EvalUtil.createMaxentQnParams();
POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(),
- "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", params);
+ "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "swe", params);
eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
"conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9347595473833098d);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
index a001ce9..af217f8 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java
@@ -68,7 +68,7 @@ public class OntoNotes4NameFinderEval {
throws IOException {
try (ObjectStream<NameSample> samples = createNameSampleStream()) {
- TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en", null,
+ TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null,
params, new TokenNameFinderFactory());
ObjectStream<NameSample> filteredSamples;
@@ -150,7 +150,7 @@ public class OntoNotes4NameFinderEval {
try (ObjectStream<NameSample> samples = createNameSampleStream()) {
- TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en", null,
+ TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null,
params, featureGen, resources);
ObjectStream<NameSample> filteredSamples;
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
index 5606b82..bf6a508 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java
@@ -63,7 +63,7 @@ public class OntoNotes4ParserEval {
private static void crossEval(TrainingParameters params, HeadRules rules, double expectedScore)
throws IOException {
try (ObjectStream<Parse> samples = createParseSampleStream()) {
- ParserCrossValidator cv = new ParserCrossValidator("en", params, rules, ParserType.CHUNKING);
+ ParserCrossValidator cv = new ParserCrossValidator("eng", params, rules, ParserType.CHUNKING);
cv.evaluate(samples, 5);
Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
index 3ea7abe..b3939e0 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
@@ -60,7 +60,7 @@ public class OntoNotes4PosTaggerEval {
private static void crossEval(TrainingParameters params, double expectedScore)
throws IOException {
try (ObjectStream<POSSample> samples = createPOSSampleStream()) {
- POSTaggerCrossValidator cv = new POSTaggerCrossValidator("en", params, new POSTaggerFactory());
+ POSTaggerCrossValidator cv = new POSTaggerCrossValidator("eng", params, new POSTaggerFactory());
cv.evaluate(samples, 5);
Assert.assertEquals(expectedScore, cv.getWordAccuracy(), 0.0001d);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java
index 24cdcd0..89f4c5e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java
@@ -111,7 +111,7 @@ public class SourceForgeModelEval {
StringBuilder text = new StringBuilder();
- try (ObjectStream<DocumentSample> lineBatches = new LeipzigDoccatSampleStream("en", 25,
+ try (ObjectStream<DocumentSample> lineBatches = new LeipzigDoccatSampleStream("eng", 25,
new MarkableFileInputStreamFactory(new File(EvalUtil.getOpennlpDataDir(),
"leipzig/eng_news_2010_300K-sentences.txt")))) {
@@ -145,7 +145,7 @@ public class SourceForgeModelEval {
Tokenizer tokenizer = new TokenizerME(model);
- try (ObjectStream<DocumentSample> lines = new LeipzigDoccatSampleStream("en", 1,
+ try (ObjectStream<DocumentSample> lines = new LeipzigDoccatSampleStream("eng", 1,
WhitespaceTokenizer.INSTANCE,
new MarkableFileInputStreamFactory(new File(EvalUtil.getOpennlpDataDir(),
"leipzig/eng_news_2010_300K-sentences.txt")))) {
@@ -164,7 +164,7 @@ public class SourceForgeModelEval {
}
private ObjectStream<DocumentSample> createLineWiseStream() throws IOException {
- return new LeipzigDoccatSampleStream("en", 1,
+ return new LeipzigDoccatSampleStream("eng", 1,
new MarkableFileInputStreamFactory(new File(EvalUtil.getOpennlpDataDir(),
"leipzig/eng_news_2010_300K-sentences.txt")));
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java
index 7b4c374..bc0d4fd 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java
@@ -29,7 +29,6 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
/**
- *
* Note:
* Sample training data must be UTF-8 encoded and uncompressed!
*/
@@ -45,7 +44,7 @@ public class Conll02NameSampleStreamTest {
@Test
public void testParsingSpanishSample() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.ES, "conll2002-es.sample");
+ ObjectStream<NameSample> sampleStream = openData(LANGUAGE.SPA, "conll2002-es.sample");
NameSample personName = sampleStream.read();
@@ -67,7 +66,7 @@ public class Conll02NameSampleStreamTest {
@Test
public void testParsingDutchSample() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NL, "conll2002-nl.sample");
+ ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample");
NameSample personName = sampleStream.read();
@@ -83,7 +82,7 @@ public class Conll02NameSampleStreamTest {
@Test
public void testReset() throws IOException {
- ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NL, "conll2002-nl.sample");
+ ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample");
NameSample sample = sampleStream.read();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java
index 6dd591e..23dda9b 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java
@@ -54,7 +54,7 @@ public class ADTokenSampleStreamTest {
File data = new File(getClass().getClassLoader()
.getResource("opennlp/tools/formats/ad.sample").toURI());
String[] args = { "-data", data.getCanonicalPath(), "-encoding", "UTF-8",
- "-lang", "pt", "-detokenizer", dict.getCanonicalPath() };
+ "-lang", "por", "-detokenizer", dict.getCanonicalPath() };
ObjectStream<TokenSample> tokenSampleStream = factory.create(args);
TokenSample sample = tokenSampleStream.read();
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
index 285af4a..f925cb3 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
@@ -72,7 +72,7 @@ public class LemmatizerMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 100);
params.put(TrainingParameters.CUTOFF_PARAM, 5);
- LemmatizerModel lemmatizerModel = LemmatizerME.train("en", sampleStream,
+ LemmatizerModel lemmatizerModel = LemmatizerME.train("eng", sampleStream,
params, new LemmatizerFactory());
this.lemmatizer = new LemmatizerME(lemmatizerModel);
@@ -98,7 +98,7 @@ public class LemmatizerMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 100);
params.put(TrainingParameters.CUTOFF_PARAM, 5);
- LemmatizerME.train("en", sampleStream, params, new LemmatizerFactory());
+ LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory());
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
index 94fbb36..740e7d1 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
@@ -68,7 +68,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
+ TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
TokenNameFinder nameFinder = new NameFinderME(nameFinderModel);
@@ -126,7 +126,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
+ TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -170,7 +170,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
+ TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -200,7 +200,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE_OVERRIDE, sampleStream,
+ TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -235,7 +235,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
+ TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -271,7 +271,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
+ TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@@ -322,7 +322,7 @@ public class NameFinderMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
- TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
+ TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
index 0326fb2..1f4b5a6 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
@@ -57,7 +57,7 @@ public class TokenNameFinderCrossValidatorTest {
mlParams.put(TrainingParameters.ALGORITHM_PARAM,
ModelType.MAXENT.toString());
- TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en",
+ TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng",
TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor)null);
cv.evaluate(sampleStream, 2);
@@ -88,7 +88,7 @@ public class TokenNameFinderCrossValidatorTest {
NameEvaluationErrorListener listener = new NameEvaluationErrorListener(out);
Map<String, Object> resources = Collections.emptyMap();
- TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en",
+ TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng",
TYPE, mlParams, null, resources, listener);
cv.evaluate(sampleStream, 2);
@@ -113,7 +113,7 @@ public class TokenNameFinderCrossValidatorTest {
mlParams.put(TrainingParameters.ALGORITHM_PARAM,
ModelType.MAXENT.toString());
- TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en",
+ TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng",
TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor)null);
cv.evaluate(sampleStream, 2);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java
index 9d72e9c..277a4e5 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java
@@ -45,7 +45,7 @@ public class ParserTest {
ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData();
HeadRules headRules = ParserTestUtil.createTestHeadRules();
- ParserModel model = Parser.train("en", parseSamples, headRules,
+ ParserModel model = Parser.train("eng", parseSamples, headRules,
TrainingParameters.defaultParams());
opennlp.tools.parser.Parser parser = ParserFactory.create(model);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java
index 92f569f..a8c0015 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java
@@ -44,7 +44,7 @@ public class ParserTest {
ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData();
HeadRules headRules = ParserTestUtil.createTestHeadRules();
- ParserModel model = Parser.train("en", parseSamples, headRules, 100, 0);
+ ParserModel model = Parser.train("eng", parseSamples, headRules, 100, 0);
opennlp.tools.parser.Parser parser = ParserFactory.create(model);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
index b98d3bf..6f3443b 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
@@ -52,7 +52,7 @@ public class POSTaggerFactoryTest {
private static POSModel trainPOSModel(POSTaggerFactory factory)
throws IOException {
- return POSTaggerME.train("en", createSampleStream(),
+ return POSTaggerME.train("eng", createSampleStream(),
TrainingParameters.defaultParams(), factory);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
index 838150e..065fe15 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
@@ -54,7 +54,7 @@ public class POSTaggerMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 100);
params.put(TrainingParameters.CUTOFF_PARAM, 5);
- return POSTaggerME.train("en", createSampleStream(), params,
+ return POSTaggerME.train("eng", createSampleStream(), params,
new POSTaggerFactory());
}
@@ -101,7 +101,7 @@ public class POSTaggerMETest {
params.put(TrainingParameters.ITERATIONS_PARAM, 100);
params.put(TrainingParameters.CUTOFF_PARAM, 5);
- POSTaggerME.train("en", stream, params, new POSTaggerFactory());
+ POSTaggerME.train("eng", stream, params, new POSTaggerFactory());
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java
index f71dd45..138e915 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java
@@ -45,8 +45,8 @@ public class SDEventStreamTest {
Factory factory = new Factory();
ObjectStream<Event> eventStream = new SDEventStream(sampleStream,
- factory.createSentenceContextGenerator("en"),
- factory.createEndOfSentenceScanner("en"));
+ factory.createSentenceContextGenerator("eng"),
+ factory.createEndOfSentenceScanner("eng"));
Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome());
Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java
index 1306e8b..06de899 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java
@@ -55,7 +55,7 @@ public class SentenceDetectorFactoryTest {
private static SentenceModel train(SentenceDetectorFactory factory)
throws IOException {
- return SentenceDetectorME.train("en", createSampleStream(), factory,
+ return SentenceDetectorME.train("eng", createSampleStream(), factory,
TrainingParameters.defaultParams());
}
@@ -72,7 +72,7 @@ public class SentenceDetectorFactoryTest {
Dictionary dic = loadAbbDictionary();
char[] eos = {'.', '?'};
- SentenceModel sdModel = train(new SentenceDetectorFactory("en", true, dic,
+ SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic,
eos));
SentenceDetectorFactory factory = sdModel.getFactory();
@@ -97,7 +97,7 @@ public class SentenceDetectorFactoryTest {
Dictionary dic = null;
char[] eos = {'.', '?'};
- SentenceModel sdModel = train(new SentenceDetectorFactory("en", true,
+ SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true,
dic, eos));
SentenceDetectorFactory factory = sdModel.getFactory();
@@ -124,7 +124,7 @@ public class SentenceDetectorFactoryTest {
Dictionary dic = null;
char[] eos = null;
- SentenceModel sdModel = train(new SentenceDetectorFactory("en", true,
+ SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true,
dic, eos));
SentenceDetectorFactory factory = sdModel.getFactory();
@@ -154,7 +154,7 @@ public class SentenceDetectorFactoryTest {
Dictionary dic = loadAbbDictionary();
char[] eos = {'.', '?'};
- SentenceModel sdModel = train(new DummySentenceDetectorFactory("en", true,
+ SentenceModel sdModel = train(new DummySentenceDetectorFactory("eng", true,
dic, eos));
SentenceDetectorFactory factory = sdModel.getFactory();
@@ -185,7 +185,7 @@ public class SentenceDetectorFactoryTest {
char[] eos = {'.', '?'};
SentenceDetectorFactory factory = SentenceDetectorFactory.create(
- DummySentenceDetectorFactory.class.getCanonicalName(), "es", false,
+ DummySentenceDetectorFactory.class.getCanonicalName(), "spa", false,
dic, eos);
Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
index 5fba0fd..87ced1b 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
@@ -46,13 +46,13 @@ public class SentenceDetectorMETest {
mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
mlParams.put(TrainingParameters.CUTOFF_PARAM, 0);
- SentenceDetectorFactory factory = new SentenceDetectorFactory("en", true, null, null);
+ SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null);
SentenceModel sentdetectModel = SentenceDetectorME.train(
- "en", new SentenceSampleStream(new PlainTextByLineStream(in,
+ "eng", new SentenceSampleStream(new PlainTextByLineStream(in,
StandardCharsets.UTF_8)), factory, mlParams);
- Assert.assertEquals("en", sentdetectModel.getLanguage());
+ Assert.assertEquals("eng", sentdetectModel.getLanguage());
SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);
@@ -146,9 +146,9 @@ public class SentenceDetectorMETest {
mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
mlParams.put(TrainingParameters.CUTOFF_PARAM, 0);
- SentenceDetectorFactory factory = new SentenceDetectorFactory("en", true, null, null);
+ SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null);
- SentenceDetectorME.train("en",
+ SentenceDetectorME.train("eng",
new SentenceSampleStream(
new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
index b344596..a916a32 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
@@ -66,7 +66,7 @@ public class TokenizerFactoryTest {
public void testDefault() throws IOException {
Dictionary dic = loadAbbDictionary();
- final String lang = "es";
+ final String lang = "spa";
TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null));
@@ -99,7 +99,7 @@ public class TokenizerFactoryTest {
public void testNullDict() throws IOException {
Dictionary dic = null;
- final String lang = "es";
+ final String lang = "spa";
TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null));
@@ -132,7 +132,7 @@ public class TokenizerFactoryTest {
public void testCustomPatternAndAlphaOpt() throws IOException {
Dictionary dic = null;
- final String lang = "es";
+ final String lang = "spa";
String pattern = "^[0-9A-Za-z]+$";
TokenizerModel model = train(new TokenizerFactory(lang, dic, true,
@@ -166,7 +166,7 @@ public class TokenizerFactoryTest {
public void testDummyFactory() throws IOException {
Dictionary dic = loadAbbDictionary();
- final String lang = "es";
+ final String lang = "spa";
String pattern = "^[0-9A-Za-z]+$";
TokenizerModel model = train(new DummyTokenizerFactory(lang, dic, true,
@@ -198,7 +198,7 @@ public class TokenizerFactoryTest {
@Test
public void testCreateDummyFactory() throws IOException {
Dictionary dic = loadAbbDictionary();
- final String lang = "es";
+ final String lang = "spa";
String pattern = "^[0-9A-Za-z]+$";
TokenizerFactory factory = TokenizerFactory.create(
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
index 3dd92a0..a634b07 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
@@ -87,7 +87,7 @@ public class TokenizerMETest {
mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
mlParams.put(TrainingParameters.CUTOFF_PARAM, 5);
- TokenizerME.train(samples, TokenizerFactory.create(null, "en", null, true, null), mlParams);
+ TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams);
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
index 4d49c58..1d43f22 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
@@ -58,7 +58,7 @@ public class TokenizerTestUtil {
mlParams.put(TrainingParameters.CUTOFF_PARAM, 0);
return TokenizerME.train(new CollectionObjectStream<>(samples),
- TokenizerFactory.create(null, "en", null, true, null), mlParams);
+ TokenizerFactory.create(null, "eng", null, true, null), mlParams);
}
static TokenizerModel createMaxentTokenModel() throws IOException {
@@ -73,7 +73,7 @@ public class TokenizerTestUtil {
mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
mlParams.put(TrainingParameters.CUTOFF_PARAM, 0);
- return TokenizerME.train(samples, TokenizerFactory.create(null, "en", null, true, null), mlParams);
+ return TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams);
}
}