You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/04/20 10:41:07 UTC

[20/50] [abbrv] opennlp git commit: OPENNLP-176: Switch language codes to ISO-639-3

OPENNLP-176: Switch language codes to ISO-639-3

This closes #114


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/559747ab
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/559747ab
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/559747ab

Branch: refs/heads/parser_regression
Commit: 559747ab8c2bd8ff76ab3208aa8d61f696c083bf
Parents: 1d8bcb6
Author: J�rn Kottmann <jo...@apache.org>
Authored: Sun Jan 29 11:06:08 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Thu Apr 20 12:40:20 2017 +0200

----------------------------------------------------------------------
 .../cmdline/namefind/CensusDictionaryCreatorTool.java     |  2 +-
 .../opennlp/tools/cmdline/parser/ParserTrainerTool.java   |  4 ++--
 .../tools/formats/AbstractSampleStreamFactory.java        |  2 +-
 .../tools/formats/Conll03NameSampleStreamFactory.java     |  6 +++---
 .../main/java/opennlp/tools/sentdetect/lang/Factory.java  | 10 +++++-----
 5 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
index 6042510..f9bf5e0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
@@ -50,7 +50,7 @@ public class CensusDictionaryCreatorTool extends BasicCmdLineTool {
   interface Parameters {
 
     @ParameterDescription(valueName = "code")
-    @OptionalParameter(defaultValue = "en")
+    @OptionalParameter(defaultValue = "eng")
     String getLang();
 
     @ParameterDescription(valueName = "charsetName")

http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
index 3a8dd5a..2709fd5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
@@ -90,10 +90,10 @@ public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerT
               params.getHeadRulesSerializerImpl());
     }
     else {
-      if ("en".equals(params.getLang())) {
+      if ("en".equals(params.getLang()) || "eng".equals(params.getLang())) {
         headRulesSerializer = new opennlp.tools.parser.lang.en.HeadRules.HeadRulesSerializer();
       }
-      else if ("es".equals(params.getLang())) {
+      else if ("es".equals(params.getLang()) || "spa".equals(params.getLang())) {
         headRulesSerializer = new opennlp.tools.parser.lang.es.AncoraSpanishHeadRules.HeadRulesSerializer();
       }
       else {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
index 6a7690e..33d0f95 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
@@ -34,7 +34,7 @@ public abstract class AbstractSampleStreamFactory<T> implements ObjectStreamFact
   }
 
   public String getLang() {
-    return "en";
+    return "eng";
   }
 
   @SuppressWarnings({"unchecked"})

http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
index 878565f..599d48a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
@@ -32,7 +32,7 @@ import opennlp.tools.util.ObjectStream;
 public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> {
 
   interface Parameters extends BasicFormatParams {
-    @ParameterDescription(valueName = "en|de")
+    @ParameterDescription(valueName = "eng|deu")
     String getLang();
 
     @ParameterDescription(valueName = "per,loc,org,misc")
@@ -54,11 +54,11 @@ public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<
 
     // TODO: support the other languages with this CoNLL.
     LANGUAGE lang;
-    if ("en".equals(params.getLang())) {
+    if ("eng".equals(params.getLang())) {
       lang = LANGUAGE.EN;
       language = params.getLang();
     }
-    else if ("de".equals(params.getLang())) {
+    else if ("deu".equals(params.getLang())) {
       lang = LANGUAGE.DE;
       language = params.getLang();
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
index 28b515b..4a34229 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
@@ -49,9 +49,9 @@ public class Factory {
 
   public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) {
 
-    if ("th".equals(languageCode)) {
+    if ("th".equals(languageCode) || "tha".equals(languageCode)) {
       return new SentenceContextGenerator();
-    } else if ("pt".equals(languageCode)) {
+    } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
       return new DefaultSDContextGenerator(abbreviations, ptEosCharacters);
     }
 
@@ -68,11 +68,11 @@ public class Factory {
   }
 
   public char[] getEOSCharacters(String languageCode) {
-    if ("th".equals(languageCode)) {
+    if ("th".equals(languageCode) || "tha".equals(languageCode)) {
       return thEosCharacters;
-    } else if ("pt".equals(languageCode)) {
+    } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
       return ptEosCharacters;
-    } else if ("jp".equals(languageCode)) {
+    } else if ("jp".equals(languageCode) || "jpn".equals(languageCode)) {
       return jpEosCharacters;
     }