You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/06/06 09:56:34 UTC

opennlp git commit: Repair test cases [Forced Update!]

Repository: opennlp
Updated Branches:
  refs/heads/LangDetect 4a3a707b8 -> 5f53fe610 (forced update)


Repair test cases


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5f53fe61
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5f53fe61
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5f53fe61

Branch: refs/heads/LangDetect
Commit: 5f53fe610ce5b3c41bfdf415cc02a249e166c64e
Parents: 21a1f84
Author: Jörn Kottmann <jo...@apache.org>
Authored: Tue Jun 6 11:49:29 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Tue Jun 6 11:56:23 2017 +0200

----------------------------------------------------------------------
 .../LanguageDetectorContextGenerator.java       | 22 +++++++++++++-------
 .../LanguageDetectorContextGeneratorTest.java   | 17 +++++----------
 .../LanguageDetectorCrossValidatorTest.java     |  9 ++++----
 3 files changed, 24 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f53fe61/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
index dcfe0e9..b28c601 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
@@ -25,6 +25,12 @@ import opennlp.tools.util.StringList;
 import opennlp.tools.util.StringUtil;
 import opennlp.tools.util.normalizer.AggregateCharSequenceNormalizer;
 import opennlp.tools.util.normalizer.CharSequenceNormalizer;
+import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.UnicodeCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.UrlCharSequenceNormalizer;
 
 /**
  * Context generator for document categorizer
@@ -40,12 +46,12 @@ class LanguageDetectorContextGenerator {
     this.maxLength = maxLength;
 
     this.normalizer = new AggregateCharSequenceNormalizer(
-        // EmojiCharSequenceNormalizer.getInstance(),
-        //UrlCharSequenceNormalizer.getInstance(),
-        //TwitterCharSequenceNormalizer.getInstance(),
-        //NumberCharSequenceNormalizer.getInstance(),
-        //UnicodeCharSequenceNormalizer.getInstance(),
-        //ShrinkCharSequenceNormalizer.getInstance());
+        EmojiCharSequenceNormalizer.getInstance(),
+        UrlCharSequenceNormalizer.getInstance(),
+        TwitterCharSequenceNormalizer.getInstance(),
+        NumberCharSequenceNormalizer.getInstance(),
+        UnicodeCharSequenceNormalizer.getInstance(),
+        ShrinkCharSequenceNormalizer.getInstance()
     );
   }
 
@@ -53,7 +59,7 @@ class LanguageDetectorContextGenerator {
    * Initializes the current instance with min 2 length and max 5 length of ngrams.
    */
   LanguageDetectorContextGenerator() {
-    this(3, 3);
+    this(2, 3);
   }
 
   public String[] getContext(String document) {
@@ -66,7 +72,7 @@ class LanguageDetectorContextGenerator {
 
     for (StringList tokenList : model) {
       if (tokenList.size() > 0) {
-        context.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0)));
+        context.add(StringUtil.toLowerCase(tokenList.getToken(0)));
       }
     }
     return context.toArray(new String[context.size()]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f53fe61/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
index 787dc1e..f6c8b18 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
@@ -34,17 +34,10 @@ public class LanguageDetectorContextGeneratorTest {
 
     Collection<String> features = Arrays.asList(cg.getContext(doc));
 
-    Assert.assertEquals(38, features.size());
-    Assert.assertTrue(features.contains("ng=ab"));
-    Assert.assertTrue(features.contains("ng=abc"));
-    Assert.assertTrue(features.contains("ng=abcd"));
-    Assert.assertTrue(features.contains("ng=abcde"));
-    Assert.assertTrue(features.contains("ng=abcde"));
-
-    Assert.assertTrue(features.contains("ng= f"));
-    Assert.assertTrue(features.contains("ng= fg"));
-    Assert.assertTrue(features.contains("ng= fgh"));
-    Assert.assertTrue(features.contains("ng= fghi"));
-
+    Assert.assertEquals(21, features.size());
+    Assert.assertTrue(features.contains("ab"));
+    Assert.assertTrue(features.contains("abc"));
+    Assert.assertTrue(features.contains("e f"));
+    Assert.assertTrue(features.contains(" fg"));
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f53fe61/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
index 8e814e8..520fc71 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
@@ -30,8 +30,9 @@ public class LanguageDetectorCrossValidatorTest {
   public void evaluate() throws Exception {
 
     TrainingParameters params = new TrainingParameters();
-    params.put(TrainingParameters.ITERATIONS_PARAM, "100");
-    params.put(TrainingParameters.CUTOFF_PARAM, "5");
+    params.put(TrainingParameters.ITERATIONS_PARAM, 100);
+    params.put(TrainingParameters.CUTOFF_PARAM, 5);
+    params.put("PrintMessages", false);
 
 
     final AtomicInteger correctCount = new AtomicInteger();
@@ -56,8 +57,8 @@ public class LanguageDetectorCrossValidatorTest {
 
     cv.evaluate(sampleStream, 2);
 
-    Assert.assertEquals(58, cv.getDocumentCount());
-    Assert.assertEquals(0.83, cv.getDocumentAccuracy(), 0.01);
+    Assert.assertEquals(99, cv.getDocumentCount());
+    Assert.assertEquals(0.98989898989899, cv.getDocumentAccuracy(), 0.01);
   }
 
 }