You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/06/06 09:56:34 UTC
opennlp git commit: Repair test cases [Forced Update!]
Repository: opennlp
Updated Branches:
refs/heads/LangDetect 4a3a707b8 -> 5f53fe610 (forced update)
Repair test cases
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5f53fe61
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5f53fe61
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5f53fe61
Branch: refs/heads/LangDetect
Commit: 5f53fe610ce5b3c41bfdf415cc02a249e166c64e
Parents: 21a1f84
Author: Jörn Kottmann <jo...@apache.org>
Authored: Tue Jun 6 11:49:29 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Tue Jun 6 11:56:23 2017 +0200
----------------------------------------------------------------------
.../LanguageDetectorContextGenerator.java | 22 +++++++++++++-------
.../LanguageDetectorContextGeneratorTest.java | 17 +++++----------
.../LanguageDetectorCrossValidatorTest.java | 9 ++++----
3 files changed, 24 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f53fe61/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
index dcfe0e9..b28c601 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
@@ -25,6 +25,12 @@ import opennlp.tools.util.StringList;
import opennlp.tools.util.StringUtil;
import opennlp.tools.util.normalizer.AggregateCharSequenceNormalizer;
import opennlp.tools.util.normalizer.CharSequenceNormalizer;
+import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.UnicodeCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.UrlCharSequenceNormalizer;
/**
* Context generator for document categorizer
@@ -40,12 +46,12 @@ class LanguageDetectorContextGenerator {
this.maxLength = maxLength;
this.normalizer = new AggregateCharSequenceNormalizer(
- // EmojiCharSequenceNormalizer.getInstance(),
- //UrlCharSequenceNormalizer.getInstance(),
- //TwitterCharSequenceNormalizer.getInstance(),
- //NumberCharSequenceNormalizer.getInstance(),
- //UnicodeCharSequenceNormalizer.getInstance(),
- //ShrinkCharSequenceNormalizer.getInstance());
+ EmojiCharSequenceNormalizer.getInstance(),
+ UrlCharSequenceNormalizer.getInstance(),
+ TwitterCharSequenceNormalizer.getInstance(),
+ NumberCharSequenceNormalizer.getInstance(),
+ UnicodeCharSequenceNormalizer.getInstance(),
+ ShrinkCharSequenceNormalizer.getInstance()
);
}
@@ -53,7 +59,7 @@ class LanguageDetectorContextGenerator {
* Initializes the current instance with min 2 length and max 5 length of ngrams.
*/
LanguageDetectorContextGenerator() {
- this(3, 3);
+ this(2, 3);
}
public String[] getContext(String document) {
@@ -66,7 +72,7 @@ class LanguageDetectorContextGenerator {
for (StringList tokenList : model) {
if (tokenList.size() > 0) {
- context.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0)));
+ context.add(StringUtil.toLowerCase(tokenList.getToken(0)));
}
}
return context.toArray(new String[context.size()]);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f53fe61/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
index 787dc1e..f6c8b18 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
@@ -34,17 +34,10 @@ public class LanguageDetectorContextGeneratorTest {
Collection<String> features = Arrays.asList(cg.getContext(doc));
- Assert.assertEquals(38, features.size());
- Assert.assertTrue(features.contains("ng=ab"));
- Assert.assertTrue(features.contains("ng=abc"));
- Assert.assertTrue(features.contains("ng=abcd"));
- Assert.assertTrue(features.contains("ng=abcde"));
- Assert.assertTrue(features.contains("ng=abcde"));
-
- Assert.assertTrue(features.contains("ng= f"));
- Assert.assertTrue(features.contains("ng= fg"));
- Assert.assertTrue(features.contains("ng= fgh"));
- Assert.assertTrue(features.contains("ng= fghi"));
-
+ Assert.assertEquals(21, features.size());
+ Assert.assertTrue(features.contains("ab"));
+ Assert.assertTrue(features.contains("abc"));
+ Assert.assertTrue(features.contains("e f"));
+ Assert.assertTrue(features.contains(" fg"));
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f53fe61/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
index 8e814e8..520fc71 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
@@ -30,8 +30,9 @@ public class LanguageDetectorCrossValidatorTest {
public void evaluate() throws Exception {
TrainingParameters params = new TrainingParameters();
- params.put(TrainingParameters.ITERATIONS_PARAM, "100");
- params.put(TrainingParameters.CUTOFF_PARAM, "5");
+ params.put(TrainingParameters.ITERATIONS_PARAM, 100);
+ params.put(TrainingParameters.CUTOFF_PARAM, 5);
+ params.put("PrintMessages", false);
final AtomicInteger correctCount = new AtomicInteger();
@@ -56,8 +57,8 @@ public class LanguageDetectorCrossValidatorTest {
cv.evaluate(sampleStream, 2);
- Assert.assertEquals(58, cv.getDocumentCount());
- Assert.assertEquals(0.83, cv.getDocumentAccuracy(), 0.01);
+ Assert.assertEquals(99, cv.getDocumentCount());
+ Assert.assertEquals(0.98989898989899, cv.getDocumentAccuracy(), 0.01);
}
}