You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by to...@apache.org on 2018/01/09 14:32:28 UTC

[opennlp] branch master updated: OPENNLP-1180 - LM API switches to String[] (#304)

This is an automated email from the ASF dual-hosted git repository.

tommaso pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new e24f0e7  OPENNLP-1180 - LM API switches to String[] (#304)
e24f0e7 is described below

commit e24f0e78dbeef99bc09a34ee0663e085cb00b8a1
Author: Tommaso Teofili <to...@gmail.com>
AuthorDate: Tue Jan 9 15:32:25 2018 +0100

    OPENNLP-1180 - LM API switches to String[] (#304)
---
 .../languagemodel/NGramLanguageModelTool.java      | 13 +--
 .../opennlp/tools/languagemodel/LanguageModel.java | 24 +++++-
 .../tools/languagemodel/NGramLanguageModel.java    | 52 +++++++++++-
 .../main/java/opennlp/tools/ngram/NGramUtils.java  | 26 +++++-
 .../languagemodel/LanguageModelEvaluationTest.java | 16 ++--
 .../languagemodel/LanguageModelTestUtils.java      | 17 ++--
 .../languagemodel/NgramLanguageModelTest.java      | 99 +++++++++++-----------
 7 files changed, 169 insertions(+), 78 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
index 1c599c5..e5d88c0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
@@ -19,6 +19,7 @@ package opennlp.tools.cmdline.languagemodel;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.util.Arrays;
 
 import opennlp.tools.cmdline.BasicCmdLineTool;
 import opennlp.tools.cmdline.CLI;
@@ -28,7 +29,6 @@ import opennlp.tools.cmdline.SystemInputStreamFactory;
 import opennlp.tools.languagemodel.NGramLanguageModel;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
-import opennlp.tools.util.StringList;
 
 /**
  * Command line tool for {@link opennlp.tools.languagemodel.NGramLanguageModel}.
@@ -60,19 +60,20 @@ public class NGramLanguageModelTool extends BasicCmdLineTool {
         String line;
         while ((line = lineStream.read()) != null) {
           double probability;
-          StringList predicted;
+          String[] predicted;
+          // TODO : use a Tokenizer here
           String[] tokens = line.split(" ");
-          StringList sample = new StringList(tokens);
           try {
-            probability = nGramLanguageModel.calculateProbability(sample);
-            predicted = nGramLanguageModel.predictNextTokens(sample);
+            probability = nGramLanguageModel.calculateProbability(tokens);
+            predicted = nGramLanguageModel.predictNextTokens(tokens);
           } catch (Exception e) {
             System.err.println("Error:" + e.getLocalizedMessage());
             System.err.println(line);
             continue;
           }
 
-          System.out.println(sample + " -> prob:" + probability + ", next:" + predicted);
+          System.out.println(Arrays.toString(tokens) + " -> prob:" + probability + ", " +
+              "next:" + Arrays.toString(predicted));
 
           perfMon.incrementCounter();
         }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java
index 98dde4e..8366925 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java
@@ -26,19 +26,39 @@ import opennlp.tools.util.StringList;
 public interface LanguageModel {
 
   /**
-   * Calculate the probability of a series of tokens (e.g. a sentence), given a vocabulary
+   * Calculate the probability of a series of tokens (e.g. a sentence), given a vocabulary.
    *
    * @param tokens the text tokens to calculate the probability for
    * @return the probability of the given text tokens in the vocabulary
+   * @deprecated use {@link #calculateProbability(String...)}
    */
+  @Deprecated
   double calculateProbability(StringList tokens);
 
   /**
-   * Predict the most probable output sequence of tokens, given an input sequence of tokens
+   * Calculate the probability of a series of tokens (e.g. a sentence), given a vocabulary.
+   *
+   * @param tokens the text tokens to calculate the probability for
+   * @return the probability of the given text tokens in the vocabulary
+   */
+  double calculateProbability(String... tokens);
+
+  /**
+   * Predict the most probable output sequence of tokens, given an input sequence of tokens.
    *
    * @param tokens a sequence of tokens
    * @return the most probable subsequent token sequence
+   * @deprecated use {@link #predictNextTokens(String...)}
    */
+  @Deprecated
   StringList predictNextTokens(StringList tokens);
 
+  /**
+   * Predict the most probable output sequence of tokens, given an input sequence of tokens.
+   *
+   * @param tokens a sequence of tokens
+   * @return the most probable subsequent token sequence
+   */
+  String[] predictNextTokens(String... tokens);
+
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
index 501c1bc..e9d25d5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
@@ -52,15 +52,37 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel {
     this.n = n;
   }
 
+  public void add(String... tokens) {
+    add(new StringList(tokens), 1, n);
+  }
+
   @Override
-  public double calculateProbability(StringList sample) {
+  public double calculateProbability(StringList tokens) {
     double probability = 0d;
     if (size() > 0) {
-      for (StringList ngram : NGramUtils.getNGrams(sample, n)) {
+      for (StringList ngram : NGramUtils.getNGrams(tokens, n)) {
         double score = stupidBackoff(ngram);
         probability += Math.log(score);
         if (Double.isNaN(probability)) {
           probability = 0d;
+          break;
+        }
+      }
+      probability = Math.exp(probability);
+    }
+    return probability;
+  }
+
+  @Override
+  public double calculateProbability(String... tokens) {
+    double probability = 0d;
+    if (size() > 0) {
+      for (String[] ngram : NGramUtils.getNGrams(tokens, n)) {
+        double score = stupidBackoff(new StringList(ngram));
+        probability += Math.log(score);
+        if (Double.isNaN(probability)) {
+          probability = 0d;
+          break;
         }
       }
       probability = Math.exp(probability);
@@ -92,6 +114,32 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel {
     return token;
   }
 
+  @Override
+  public String[] predictNextTokens(String... tokens) {
+    double maxProb = Double.NEGATIVE_INFINITY;
+    String[] token = null;
+
+    for (StringList ngram : this) {
+      String[] sequence = new String[ngram.size() + tokens.length];
+      for (int i = 0; i < tokens.length; i++) {
+        sequence[i] = tokens[i];
+      }
+      for (int i = 0; i < ngram.size(); i++) {
+        sequence[i + tokens.length] = ngram.getToken(i);
+      }
+      double v = calculateProbability(sequence);
+      if (v > maxProb) {
+        maxProb = v;
+        token = new String[ngram.size()];
+        for (int i = 0; i < ngram.size(); i++) {
+          token[i] = ngram.getToken(i);
+        }
+      }
+    }
+
+    return token;
+  }
+
   private double stupidBackoff(StringList ngram) {
     int count = getCount(ngram);
     StringList nMinusOneToken = NGramUtils.getNMinusOneTokenFirst(ngram);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java
index e41291f..dd3e19b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java
@@ -229,7 +229,7 @@ public class NGramUtils {
   }
 
   /**
-   * get the ngrams of dimension n of a certain input sequence of tokens
+   * Get the ngrams of dimension n of a certain input sequence of tokens.
    *
    * @param sequence a sequence of tokens
    * @param size     the size of the resulting ngrmams
@@ -249,6 +249,30 @@ public class NGramUtils {
         ngrams.add(new StringList(ngram));
       }
     }
+    return ngrams;
+  }
+
+  /**
+   * Get the ngrams of dimension n of a certain input sequence of tokens.
+   *
+   * @param sequence a sequence of tokens
+   * @param size     the size of the resulting ngrmams
+   * @return all the possible ngrams of the given size derivable from the input sequence
+   */
+  public static Collection<String[]> getNGrams(String[] sequence, int size) {
+    Collection<String[]> ngrams = new LinkedList<>();
+    if (size == -1 || size >= sequence.length) {
+      ngrams.add(sequence);
+    } else {
+      for (int i = 0; i < sequence.length - size + 1; i++) {
+        String[] ngram = new String[size];
+        ngram[0] = sequence[i];
+        for (int j = 1; j < size; j++) {
+          ngram[j] = sequence[i + j];
+        }
+        ngrams.add(ngram);
+      }
+    }
 
     return ngrams;
   }
diff --git a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java
index d4e8e37..eea0eb6 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java
@@ -32,29 +32,29 @@ public class LanguageModelEvaluationTest {
   @Test
   public void testPerplexityComparison() throws Exception {
 
-    Collection<StringList> trainingVocabulary =
+    Collection<String[]> trainingVocabulary =
         LanguageModelTestUtils.generateRandomVocabulary(1100000);
-    Collection<StringList> testVocabulary =
+    Collection<String[]> testVocabulary =
         LanguageModelTestUtils.generateRandomVocabulary(100);
 
     NGramLanguageModel unigramLM = new NGramLanguageModel(1);
-    for (StringList sentence : trainingVocabulary) {
-      unigramLM.add(sentence, 1, 1);
+    for (String[] sentence : trainingVocabulary) {
+      unigramLM.add(new StringList(sentence), 1, 1);
     }
     double unigramPerplexity =
         LanguageModelTestUtils.getPerplexity(unigramLM, testVocabulary, 1);
 
     NGramLanguageModel bigramLM = new NGramLanguageModel(2);
-    for (StringList sentence : trainingVocabulary) {
-      bigramLM.add(sentence, 1, 2);
+    for (String[] sentence : trainingVocabulary) {
+      bigramLM.add(new StringList(sentence), 1, 2);
     }
     double bigramPerplexity =
         LanguageModelTestUtils.getPerplexity(bigramLM, testVocabulary, 2);
     Assert.assertTrue(unigramPerplexity >= bigramPerplexity);
 
     NGramLanguageModel trigramLM = new NGramLanguageModel(3);
-    for (StringList sentence : trainingVocabulary) {
-      trigramLM.add(sentence, 1, 3);
+    for (String[] sentence : trainingVocabulary) {
+      trigramLM.add(new StringList(sentence), 1, 3);
     }
     double trigramPerplexity =
         LanguageModelTestUtils.getPerplexity(trigramLM, testVocabulary, 3);
diff --git a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelTestUtils.java b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelTestUtils.java
index 81725ae..56edb9e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelTestUtils.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelTestUtils.java
@@ -26,7 +26,6 @@ import java.util.Random;
 import org.junit.Ignore;
 
 import opennlp.tools.ngram.NGramUtils;
-import opennlp.tools.util.StringList;
 
 /**
  * Utility class for language models tests
@@ -39,16 +38,16 @@ public class LanguageModelTestUtils {
 
   private static final char[] chars = new char[]{'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
 
-  public static Collection<StringList> generateRandomVocabulary(int size) {
-    Collection<StringList> vocabulary = new LinkedList<>();
+  public static Collection<String[]> generateRandomVocabulary(int size) {
+    Collection<String[]> vocabulary = new LinkedList<>();
     for (int i = 0; i < size; i++) {
-      StringList sentence = generateRandomSentence();
+      String[] sentence = generateRandomSentence();
       vocabulary.add(sentence);
     }
     return vocabulary;
   }
 
-  public static StringList generateRandomSentence() {
+  public static String[] generateRandomSentence() {
     int dimension = r.nextInt(10) + 1;
     String[] sentence = new String[dimension];
     for (int j = 0; j < dimension; j++) {
@@ -56,15 +55,15 @@ public class LanguageModelTestUtils {
       char c = chars[i];
       sentence[j] = c + "-" + c + "-" + c;
     }
-    return new StringList(sentence);
+    return sentence;
   }
 
-  public static double getPerplexity(LanguageModel lm, Collection<StringList> testSet, int ngramSize)
+  public static double getPerplexity(LanguageModel lm, Collection<String[]> testSet, int ngramSize)
       throws ArithmeticException {
     BigDecimal perplexity = new BigDecimal(1d);
 
-    for (StringList sentence : testSet) {
-      for (StringList ngram : NGramUtils.getNGrams(sentence, ngramSize)) {
+    for (String[] sentence : testSet) {
+      for (String[] ngram : NGramUtils.getNGrams(sentence, ngramSize)) {
         double ngramProbability = lm.calculateProbability(ngram);
         perplexity = perplexity.multiply(new BigDecimal(1d).divide(
             new BigDecimal(ngramProbability), CONTEXT));
diff --git a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java
index 2ac1f5e..2091d3f 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java
@@ -26,7 +26,6 @@ import org.junit.Assert;
 import org.junit.Test;
 
 import opennlp.tools.ngram.NGramGenerator;
-import opennlp.tools.util.StringList;
 
 /**
  * Tests for {@link opennlp.tools.languagemodel.NGramLanguageModel}
@@ -34,19 +33,19 @@ import opennlp.tools.util.StringList;
 public class NgramLanguageModelTest {
 
   @Test
-  public void testEmptyVocabularyProbability() throws Exception {
+  public void testEmptyVocabularyProbability() {
     NGramLanguageModel model = new NGramLanguageModel();
     Assert.assertEquals("probability with an empty vocabulary is always 0",
-        0d, model.calculateProbability(new StringList("")), 0d);
+        0d, model.calculateProbability(""), 0d);
     Assert.assertEquals("probability with an empty vocabulary is always 0",
-        0d, model.calculateProbability(new StringList("1", "2", "3")), 0d);
+        0d, model.calculateProbability("1", "2", "3"), 0d);
   }
 
   @Test
-  public void testRandomVocabularyAndSentence() throws Exception {
+  public void testRandomVocabularyAndSentence() {
     NGramLanguageModel model = new NGramLanguageModel();
-    for (StringList sentence : LanguageModelTestUtils.generateRandomVocabulary(10)) {
-      model.add(sentence, 1, 3);
+    for (String[] sentence : LanguageModelTestUtils.generateRandomVocabulary(10)) {
+      model.add(sentence);
     }
     double probability = model.calculateProbability(LanguageModelTestUtils.generateRandomSentence());
     Assert.assertTrue("a probability measure should be between 0 and 1 [was "
@@ -54,82 +53,82 @@ public class NgramLanguageModelTest {
   }
 
   @Test
-  public void testNgramModel() throws Exception {
+  public void testNgramModel() {
     NGramLanguageModel model = new NGramLanguageModel(4);
-    model.add(new StringList("I", "saw", "the", "fox"), 1, 4);
-    model.add(new StringList("the", "red", "house"), 1, 4);
-    model.add(new StringList("I", "saw", "something", "nice"), 1, 2);
-    double probability = model.calculateProbability(new StringList("I", "saw", "the", "red", "house"));
+    model.add("I", "saw", "the", "fox");
+    model.add("the", "red", "house");
+    model.add("I", "saw", "something", "nice");
+    double probability = model.calculateProbability("I", "saw", "the", "red", "house");
     Assert.assertTrue("a probability measure should be between 0 and 1 [was "
         + probability + "]", probability >= 0 && probability <= 1);
 
-    StringList tokens = model.predictNextTokens(new StringList("I", "saw"));
+    String[] tokens = model.predictNextTokens("I", "saw");
     Assert.assertNotNull(tokens);
-    Assert.assertEquals(new StringList("the", "fox"), tokens);
+    Assert.assertArrayEquals(new String[] {"the", "fox"}, tokens);
   }
 
   @Test
-  public void testBigramProbabilityNoSmoothing() throws Exception {
+  public void testBigramProbability() {
     NGramLanguageModel model = new NGramLanguageModel(2);
-    model.add(new StringList("<s>", "I", "am", "Sam", "</s>"), 1, 2);
-    model.add(new StringList("<s>", "Sam", "I", "am", "</s>"), 1, 2);
-    model.add(new StringList("<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>"), 1, 2);
-    double probability = model.calculateProbability(new StringList("<s>", "I"));
+    model.add("<s>", "I", "am", "Sam", "</s>");
+    model.add("<s>", "Sam", "I", "am", "</s>");
+    model.add("<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>");
+    double probability = model.calculateProbability("<s>", "I");
     Assert.assertEquals(0.666d, probability, 0.001);
-    probability = model.calculateProbability(new StringList("Sam", "</s>"));
+    probability = model.calculateProbability("Sam", "</s>");
     Assert.assertEquals(0.5d, probability, 0.001);
-    probability = model.calculateProbability(new StringList("<s>", "Sam"));
+    probability = model.calculateProbability("<s>", "Sam");
     Assert.assertEquals(0.333d, probability, 0.001);
-    probability = model.calculateProbability(new StringList("am", "Sam"));
+    probability = model.calculateProbability("am", "Sam");
     Assert.assertEquals(0.5d, probability, 0.001);
-    probability = model.calculateProbability(new StringList("I", "am"));
+    probability = model.calculateProbability("I", "am");
     Assert.assertEquals(0.666d, probability, 0.001);
-    probability = model.calculateProbability(new StringList("I", "do"));
+    probability = model.calculateProbability("I", "do");
     Assert.assertEquals(0.333d, probability, 0.001);
-    probability = model.calculateProbability(new StringList("I", "am", "Sam"));
+    probability = model.calculateProbability("I", "am", "Sam");
     Assert.assertEquals(0.333d, probability, 0.001);
   }
 
   @Test
-  public void testTrigram() throws Exception {
+  public void testTrigram() {
     NGramLanguageModel model = new NGramLanguageModel(3);
-    model.add(new StringList("I", "see", "the", "fox"), 1, 3);
-    model.add(new StringList("the", "red", "house"), 1, 3);
-    model.add(new StringList("I", "saw", "something", "nice"), 1, 3);
-    double probability = model.calculateProbability(new StringList("I", "saw", "the", "red", "house"));
+    model.add("I", "see", "the", "fox");
+    model.add("the", "red", "house");
+    model.add("I", "saw", "something", "nice");
+    double probability = model.calculateProbability("I", "saw", "the", "red", "house");
     Assert.assertTrue("a probability measure should be between 0 and 1 [was "
         + probability + "]", probability >= 0 && probability <= 1);
 
-    StringList tokens = model.predictNextTokens(new StringList("I", "saw"));
+    String[] tokens = model.predictNextTokens("I", "saw");
     Assert.assertNotNull(tokens);
-    Assert.assertEquals(new StringList("something"), tokens);
+    Assert.assertArrayEquals(new String[] {"something"}, tokens);
   }
 
   @Test
-  public void testBigram() throws Exception {
+  public void testBigram() {
     NGramLanguageModel model = new NGramLanguageModel(2);
-    model.add(new StringList("I", "see", "the", "fox"), 1, 2);
-    model.add(new StringList("the", "red", "house"), 1, 2);
-    model.add(new StringList("I", "saw", "something", "nice"), 1, 2);
-    double probability = model.calculateProbability(new StringList("I", "saw", "the", "red", "house"));
+    model.add("I", "see", "the", "fox");
+    model.add("the", "red", "house");
+    model.add("I", "saw", "something", "nice");
+    double probability = model.calculateProbability("I", "saw", "the", "red", "house");
     Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
         probability >= 0 && probability <= 1);
 
-    StringList tokens = model.predictNextTokens(new StringList("I", "saw"));
+    String[] tokens = model.predictNextTokens("I", "saw");
     Assert.assertNotNull(tokens);
-    Assert.assertEquals(new StringList("something"), tokens);
+    Assert.assertArrayEquals(new String[] {"something"}, tokens);
   }
 
   @Test
   public void testSerializedNGramLanguageModel() throws Exception {
     NGramLanguageModel languageModel = new NGramLanguageModel(getClass().getResourceAsStream(
         "/opennlp/tools/ngram/ngram-model.xml"), 3);
-    double probability = languageModel.calculateProbability(new StringList("The", "brown", "fox", "jumped"));
+    double probability = languageModel.calculateProbability("The", "brown", "fox", "jumped");
     Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
         probability >= 0 && probability <= 1);
-    StringList tokens = languageModel.predictNextTokens(new StringList("the","brown","fox"));
+    String[] tokens = languageModel.predictNextTokens("the", "brown", "fox");
     Assert.assertNotNull(tokens);
-    Assert.assertEquals(new StringList("jumped"), tokens);
+    Assert.assertArrayEquals(new String[] {"jumped"}, tokens);
   }
 
   @Test
@@ -144,18 +143,18 @@ public class NgramLanguageModelTest {
       for (String generatedString : generatedStrings) {
         String[] tokens = generatedString.split(" ");
         if (tokens.length > 0) {
-          languageModel.add(new StringList(tokens), 1, ngramSize);
+          languageModel.add(tokens);
         }
       }
     }
-    StringList tokens = languageModel.predictNextTokens(new StringList("neural",
-        "network", "language"));
+    String[] tokens = languageModel.predictNextTokens("neural",
+        "network", "language");
     Assert.assertNotNull(tokens);
-    Assert.assertEquals(new StringList("models"), tokens);
-    double p1 = languageModel.calculateProbability(new StringList("neural", "network",
-        "language", "models"));
-    double p2 = languageModel.calculateProbability(new StringList("neural", "network",
-        "language", "model"));
+    Assert.assertArrayEquals(new String[] {"models"}, tokens);
+    double p1 = languageModel.calculateProbability("neural", "network",
+        "language", "models");
+    double p2 = languageModel.calculateProbability("neural", "network",
+        "language", "model");
     Assert.assertTrue(p1 > p2);
   }
 }

-- 
To stop receiving notification emails like this one, please contact
['"commits@opennlp.apache.org" <co...@opennlp.apache.org>'].