You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2016/12/20 22:38:35 UTC

opennlp git commit: Add japanese eos chars

Repository: opennlp
Updated Branches:
  refs/heads/trunk f89f3ffe2 -> 554626de2


Add japanese eos chars

Thanks to Bar Perach for providing a patch!

See issue OPENNLP-772


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/554626de
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/554626de
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/554626de

Branch: refs/heads/trunk
Commit: 554626de2edb69951083b847b5fd13db28bae30e
Parents: f89f3ff
Author: J�rn Kottmann <jo...@apache.org>
Authored: Tue Dec 20 23:35:48 2016 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Tue Dec 20 23:35:48 2016 +0100

----------------------------------------------------------------------
 .../java/opennlp/tools/sentdetect/lang/Factory.java    | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/554626de/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
index 1f77ade..d182b26 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
@@ -36,14 +36,11 @@ public class Factory {
 
   public static final char[] thEosCharacters = new char[] { ' ','\n' };
 
+  public static final char[] jpEosCharacters = new char[] {'\u3002', '\uff01', '\uff1f'};
+
   public EndOfSentenceScanner createEndOfSentenceScanner(String languageCode) {
-    if ("th".equals(languageCode)) {
-      return new DefaultEndOfSentenceScanner(new char[]{' ','\n'});
-    } else if("pt".equals(languageCode)) {
-      return new DefaultEndOfSentenceScanner(ptEosCharacters);
-    }
 
-    return new DefaultEndOfSentenceScanner(defaultEosCharacters);
+    return new DefaultEndOfSentenceScanner(getEOSCharacters(languageCode));
   }
 
   public EndOfSentenceScanner createEndOfSentenceScanner(
@@ -76,8 +73,10 @@ public class Factory {
       return thEosCharacters;
     } else if ("pt".equals(languageCode)) {
       return ptEosCharacters;
+    } else if ("jp".equals(languageCode)) {
+      return jpEosCharacters;
     }
 
     return defaultEosCharacters;
   }
-}
\ No newline at end of file
+}