You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/06/22 14:49:17 UTC

opennlp git commit: OPENNLP-1097: Enable the normalizers by default in langdetect

Repository: opennlp
Updated Branches:
  refs/heads/master b5b6d5c27 -> 5a0f9cafc


OPENNLP-1097: Enable the normalizers by default in langdetect


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5a0f9caf
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5a0f9caf
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5a0f9caf

Branch: refs/heads/master
Commit: 5a0f9cafc024c691de224f676c574c00fd56e8b1
Parents: b5b6d5c
Author: Jörn Kottmann <jo...@apache.org>
Authored: Thu Jun 22 14:52:10 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Thu Jun 22 16:48:36 2017 +0200

----------------------------------------------------------------------
 .../tools/langdetect/LanguageDetectorContextGenerator.java       | 2 +-
 opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/5a0f9caf/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
index 1ec42fd..f0941df 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
@@ -58,7 +58,7 @@ class LanguageDetectorContextGenerator {
     Collection<String> context = new ArrayList<>();
 
     NGramModel model = new NGramModel();
-    model.add(document, minLength, maxLength);
+    model.add(normalizer.normalize(document), minLength, maxLength);
 
     for (StringList tokenList : model) {
       if (tokenList.size() > 0) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5a0f9caf/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
index 0e0e4dd..a17578b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
@@ -168,14 +168,14 @@ public class NGramModel implements Iterable<StringList> {
    * @param minLength
    * @param maxLength
    */
-  public void add(String chars, int minLength, int maxLength) {
+  public void add(CharSequence chars, int minLength, int maxLength) {
 
     for (int lengthIndex = minLength; lengthIndex < maxLength + 1; lengthIndex++) {
       for (int textIndex = 0;
           textIndex + lengthIndex - 1 < chars.length(); textIndex++) {
 
         String gram = StringUtil.toLowerCase(
-            chars.substring(textIndex, textIndex + lengthIndex));
+            chars.subSequence(textIndex, textIndex + lengthIndex));
 
         add(new StringList(new String[]{gram}));
       }