You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2016/12/26 13:34:28 UTC
[34/50] [abbrv] opennlp git commit: Add japanese eos chars
Add japanese eos chars
Thanks to Bar Perach for providing a patch!
See issue OPENNLP-772
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/554626de
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/554626de
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/554626de
Branch: refs/heads/889
Commit: 554626de2edb69951083b847b5fd13db28bae30e
Parents: f89f3ff
Author: J�rn Kottmann <jo...@apache.org>
Authored: Tue Dec 20 23:35:48 2016 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Tue Dec 20 23:35:48 2016 +0100
----------------------------------------------------------------------
.../java/opennlp/tools/sentdetect/lang/Factory.java | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/554626de/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
index 1f77ade..d182b26 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
@@ -36,14 +36,11 @@ public class Factory {
public static final char[] thEosCharacters = new char[] { ' ','\n' };
+ public static final char[] jpEosCharacters = new char[] {'\u3002', '\uff01', '\uff1f'};
+
public EndOfSentenceScanner createEndOfSentenceScanner(String languageCode) {
- if ("th".equals(languageCode)) {
- return new DefaultEndOfSentenceScanner(new char[]{' ','\n'});
- } else if("pt".equals(languageCode)) {
- return new DefaultEndOfSentenceScanner(ptEosCharacters);
- }
- return new DefaultEndOfSentenceScanner(defaultEosCharacters);
+ return new DefaultEndOfSentenceScanner(getEOSCharacters(languageCode));
}
public EndOfSentenceScanner createEndOfSentenceScanner(
@@ -76,8 +73,10 @@ public class Factory {
return thEosCharacters;
} else if ("pt".equals(languageCode)) {
return ptEosCharacters;
+ } else if ("jp".equals(languageCode)) {
+ return jpEosCharacters;
}
return defaultEosCharacters;
}
-}
\ No newline at end of file
+}