You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2017/03/13 16:06:14 UTC
[06/24] opennlp git commit: OPENNLP-176: Switch language codes to
ISO-639-3
OPENNLP-176: Switch language codes to ISO-639-3
This closes #114
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/33871c3d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/33871c3d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/33871c3d
Branch: refs/heads/OPENNLP-778
Commit: 33871c3db6d870f460636f6205bc23f8d48fdd4d
Parents: bbd6d3f
Author: J�rn Kottmann <jo...@apache.org>
Authored: Sun Jan 29 11:06:08 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Fri Feb 17 11:01:51 2017 +0100
----------------------------------------------------------------------
.../cmdline/namefind/CensusDictionaryCreatorTool.java | 2 +-
.../opennlp/tools/cmdline/parser/ParserTrainerTool.java | 4 ++--
.../tools/formats/AbstractSampleStreamFactory.java | 2 +-
.../tools/formats/Conll03NameSampleStreamFactory.java | 6 +++---
.../main/java/opennlp/tools/sentdetect/lang/Factory.java | 10 +++++-----
5 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
index 6042510..f9bf5e0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
@@ -50,7 +50,7 @@ public class CensusDictionaryCreatorTool extends BasicCmdLineTool {
interface Parameters {
@ParameterDescription(valueName = "code")
- @OptionalParameter(defaultValue = "en")
+ @OptionalParameter(defaultValue = "eng")
String getLang();
@ParameterDescription(valueName = "charsetName")
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
index 3a8dd5a..2709fd5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
@@ -90,10 +90,10 @@ public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerT
params.getHeadRulesSerializerImpl());
}
else {
- if ("en".equals(params.getLang())) {
+ if ("en".equals(params.getLang()) || "eng".equals(params.getLang())) {
headRulesSerializer = new opennlp.tools.parser.lang.en.HeadRules.HeadRulesSerializer();
}
- else if ("es".equals(params.getLang())) {
+ else if ("es".equals(params.getLang()) || "spa".equals(params.getLang())) {
headRulesSerializer = new opennlp.tools.parser.lang.es.AncoraSpanishHeadRules.HeadRulesSerializer();
}
else {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
index 6a7690e..33d0f95 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
@@ -34,7 +34,7 @@ public abstract class AbstractSampleStreamFactory<T> implements ObjectStreamFact
}
public String getLang() {
- return "en";
+ return "eng";
}
@SuppressWarnings({"unchecked"})
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
index 878565f..599d48a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
@@ -32,7 +32,7 @@ import opennlp.tools.util.ObjectStream;
public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> {
interface Parameters extends BasicFormatParams {
- @ParameterDescription(valueName = "en|de")
+ @ParameterDescription(valueName = "eng|deu")
String getLang();
@ParameterDescription(valueName = "per,loc,org,misc")
@@ -54,11 +54,11 @@ public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<
// TODO: support the other languages with this CoNLL.
LANGUAGE lang;
- if ("en".equals(params.getLang())) {
+ if ("eng".equals(params.getLang())) {
lang = LANGUAGE.EN;
language = params.getLang();
}
- else if ("de".equals(params.getLang())) {
+ else if ("deu".equals(params.getLang())) {
lang = LANGUAGE.DE;
language = params.getLang();
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/33871c3d/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
index 28b515b..4a34229 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
@@ -49,9 +49,9 @@ public class Factory {
public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) {
- if ("th".equals(languageCode)) {
+ if ("th".equals(languageCode) || "tha".equals(languageCode)) {
return new SentenceContextGenerator();
- } else if ("pt".equals(languageCode)) {
+ } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
return new DefaultSDContextGenerator(abbreviations, ptEosCharacters);
}
@@ -68,11 +68,11 @@ public class Factory {
}
public char[] getEOSCharacters(String languageCode) {
- if ("th".equals(languageCode)) {
+ if ("th".equals(languageCode) || "tha".equals(languageCode)) {
return thEosCharacters;
- } else if ("pt".equals(languageCode)) {
+ } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
return ptEosCharacters;
- } else if ("jp".equals(languageCode)) {
+ } else if ("jp".equals(languageCode) || "jpn".equals(languageCode)) {
return jpEosCharacters;
}