You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/04/16 17:26:32 UTC

[01/50] [abbrv] opennlp git commit: Rolling back Release 1.7.2 RC

Repository: opennlp
Updated Branches:
  refs/heads/parser_regression [created] 3ac2fb377


Rolling back Release 1.7.2 RC


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a81f37b3
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a81f37b3
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a81f37b3

Branch: refs/heads/parser_regression
Commit: a81f37b3c89c37b092f0a83d1c5cf5959bafd10c
Parents: c91d353
Author: smarthi <sm...@apache.org>
Authored: Wed Feb 1 09:15:41 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:50 2017 +0200

----------------------------------------------------------------------
 opennlp-brat-annotator/pom.xml   | 2 +-
 opennlp-distr/pom.xml            | 2 +-
 opennlp-docs/pom.xml             | 2 +-
 opennlp-morfologik-addon/pom.xml | 2 +-
 opennlp-tools/pom.xml            | 2 +-
 opennlp-uima/pom.xml             | 2 +-
 pom.xml                          | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-brat-annotator/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
index 1633deb..4bf95cf 100644
--- a/opennlp-brat-annotator/pom.xml
+++ b/opennlp-brat-annotator/pom.xml
@@ -17,7 +17,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.3-SNAPSHOT</version>
+		<version>1.7.2-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-distr/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index c0a57c3..613bd80 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -24,7 +24,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.3-SNAPSHOT</version>
+		<version>1.7.2-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-docs/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml
index fd2b0d1..7b916c0 100644
--- a/opennlp-docs/pom.xml
+++ b/opennlp-docs/pom.xml
@@ -24,7 +24,7 @@
   <parent>
 	<groupId>org.apache.opennlp</groupId>
 	<artifactId>opennlp</artifactId>
-	<version>1.7.3-SNAPSHOT</version>
+	<version>1.7.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-morfologik-addon/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml
index 1c384c7..d62a70a 100644
--- a/opennlp-morfologik-addon/pom.xml
+++ b/opennlp-morfologik-addon/pom.xml
@@ -24,7 +24,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.3-SNAPSHOT</version>
+		<version>1.7.2-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-tools/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index 22fc017..9441ebb 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -25,7 +25,7 @@
   <parent>
     <groupId>org.apache.opennlp</groupId>
     <artifactId>opennlp</artifactId>
-    <version>1.7.3-SNAPSHOT</version>
+    <version>1.7.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
     

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/opennlp-uima/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml
index 070fec9..1e99c3d 100644
--- a/opennlp-uima/pom.xml
+++ b/opennlp-uima/pom.xml
@@ -25,7 +25,7 @@
 	<parent>
 	    <groupId>org.apache.opennlp</groupId>
 	    <artifactId>opennlp</artifactId>
-	    <version>1.7.3-SNAPSHOT</version>
+	    <version>1.7.2-SNAPSHOT</version>
 	    <relativePath>../pom.xml</relativePath>
     </parent>
     

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a81f37b3/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 98acfb1..7081f25 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
 
 	<groupId>org.apache.opennlp</groupId>
 	<artifactId>opennlp</artifactId>
-	<version>1.7.3-SNAPSHOT</version>
+	<version>1.7.2-SNAPSHOT</version>
 	<packaging>pom</packaging>
 
 	<name>Apache OpenNLP Reactor</name>

[46/50] [abbrv] opennlp git commit: OPENNLP-1014: Add more tests for featuregen

Posted by jo...@apache.org.

OPENNLP-1014: Add more tests for featuregen

This closes #151


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/81b07ecd
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/81b07ecd
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/81b07ecd

Branch: refs/heads/parser_regression
Commit: 81b07ecd1ff22c1e715b5f55d8a1a61f227cffb5
Parents: 7589af6
Author: koji <ko...@rondhuit.com>
Authored: Fri Apr 7 10:49:50 2017 +0900
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:57 2017 +0200

----------------------------------------------------------------------
 .../util/featuregen/FeatureGeneratorUtil.java   |  10 +-
 .../BigramNameFeatureGeneratorTest.java         |  94 ++++++++++++++
 .../CharacterNgramFeatureGeneratorTest.java     |  61 +++++++++
 .../featuregen/FeatureGeneratorUtilTest.java    |  45 +++++++
 .../util/featuregen/InSpanGeneratorTest.java    |  73 +++++++++++
 .../PosTaggerFeatureGeneratorTest.java          |  76 ++++++++++++
 .../PreviousTwoMapFeatureGeneratorTest.java     |  63 ++++++++++
 .../SentenceFeatureGeneratorTest.java           | 123 +++++++++++++++++++
 .../TokenClassFeatureGeneratorTest.java         |  63 ++++++++++
 .../featuregen/TokenFeatureGeneratorTest.java   |  62 ++++++++++
 .../TokenPatternFeatureGeneratorTest.java       |  74 +++++++++++
 .../TrigramNameFeatureGeneratorTest.java        | 108 ++++++++++++++++
 12 files changed, 848 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
index dfcf10d..79c2a50 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
@@ -85,11 +85,13 @@ public class FeatureGeneratorUtil {
         feat = "num";
       }
     }
-    else if (pattern.isAllCapitalLetter() && token.length() == 1) {
-      feat = "sc";
-    }
     else if (pattern.isAllCapitalLetter()) {
-      feat = "ac";
+      if (token.length() == 1) {
+        feat = "sc";
+      }
+      else {
+        feat = "ac";
+      }
     }
     else if (capPeriod.matcher(token).find()) {
       feat = "cp";

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java
new file mode 100644
index 0000000..0e31059
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class BigramNameFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testBegin() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("w,nw=This,is", features.get(0));
+    Assert.assertEquals("wc,nc=ic,lc", features.get(1));
+  }
+
+  @Test
+  public void testMiddle() {
+
+    final int testTokenIndex = 2;
+
+    AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(4, features.size());
+    Assert.assertEquals("pw,w=is,an", features.get(0));
+    Assert.assertEquals("pwc,wc=lc,lc", features.get(1));
+    Assert.assertEquals("w,nw=an,example", features.get(2));
+    Assert.assertEquals("wc,nc=lc,lc", features.get(3));
+  }
+
+  @Test
+  public void testEnd() {
+
+    final int testTokenIndex = 4;
+
+    AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("pw,w=example,sentence", features.get(0));
+    Assert.assertEquals("pwc,wc=lc,lc", features.get(1));
+  }
+
+  @Test
+  public void testShort() {
+
+    String[] shortSentence = new String[] {"word"};
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
+
+    generator.createFeatures(features, shortSentence, testTokenIndex, null);
+
+    Assert.assertEquals(0, features.size());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java
new file mode 100644
index 0000000..a695e06
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class CharacterNgramFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testDefault() {
+
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new CharacterNgramFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    assertContainsNg(features,
+            "ex", "exa", "exam", "examp",
+            "xa", "xam", "xamp", "xampl",
+            "am", "amp", "ampl", "ample",
+            "mp", "mpl", "mple",
+            "pl", "ple",
+            "le");
+  }
+
+  private static void assertContainsNg(List<String> features, String... elements) {
+    Assert.assertEquals(elements.length, features.size());
+    for (String e: elements) {
+      Assert.assertTrue(features.contains("ng=" + e));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java
new file mode 100644
index 0000000..cca0d83
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class FeatureGeneratorUtilTest {
+
+  @Test
+  public void test() {
+    // digits
+    Assert.assertEquals("2d", FeatureGeneratorUtil.tokenFeature("12"));
+    Assert.assertEquals("4d", FeatureGeneratorUtil.tokenFeature("1234"));
+    Assert.assertEquals("an", FeatureGeneratorUtil.tokenFeature("abcd234"));
+    Assert.assertEquals("dd", FeatureGeneratorUtil.tokenFeature("1234-56"));
+    Assert.assertEquals("ds", FeatureGeneratorUtil.tokenFeature("4/6/2017"));
+    Assert.assertEquals("dc", FeatureGeneratorUtil.tokenFeature("1,234,567"));
+    Assert.assertEquals("dp", FeatureGeneratorUtil.tokenFeature("12.34567"));
+    Assert.assertEquals("num", FeatureGeneratorUtil.tokenFeature("123(456)7890"));
+
+    // letters
+    Assert.assertEquals("lc", FeatureGeneratorUtil.tokenFeature("opennlp"));
+    Assert.assertEquals("sc", FeatureGeneratorUtil.tokenFeature("O"));
+    Assert.assertEquals("ac", FeatureGeneratorUtil.tokenFeature("OPENNLP"));
+    Assert.assertEquals("cp", FeatureGeneratorUtil.tokenFeature("A."));
+    Assert.assertEquals("ic", FeatureGeneratorUtil.tokenFeature("Mike"));
+    Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("somethingStupid"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java
new file mode 100644
index 0000000..a5fd3d4
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.util.Span;
+
+public class InSpanGeneratorTest {
+
+  static class SimpleSpecificPersonFinder implements TokenNameFinder {
+
+    private final String theName;
+
+    public SimpleSpecificPersonFinder(String theName) {
+      this.theName = theName;
+    }
+
+    @Override
+    public Span[] find(String[] tokens) {
+      for (int i = 0; i < tokens.length; i++) {
+        if (theName.equals(tokens[i])) {
+          return new Span[]{ new Span(i, i + 1, "person") };
+        }
+      }
+
+      return new Span[]{};
+    }
+
+    @Override
+    public void clearAdaptiveData() {
+    }
+  }
+
+  @Test
+  public void test() {
+
+    List<String> features = new ArrayList<>();
+
+    String[] testSentence = new String[]{ "Every", "John", "has", "its", "day", "." };
+
+    AdaptiveFeatureGenerator generator = new InSpanGenerator("john", new SimpleSpecificPersonFinder("John"));
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(0, features.size());
+
+    features.clear();
+    generator.createFeatures(features, testSentence, 1, null);
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("john:w=dic", features.get(0));
+    Assert.assertEquals("john:w=dic=John", features.get(1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java
new file mode 100644
index 0000000..b916fc9
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class PosTaggerFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+  static String[] testTags = new String[] {"DT", "VBZ", "DT", "NN", "NN"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testBegin() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, testTags);
+
+    Assert.assertEquals(0, features.size());
+  }
+
+  @Test
+  public void testNext() {
+
+    final int testTokenIndex = 1;
+
+    AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, testTags);
+
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("t=DT", features.get(0));
+  }
+
+  @Test
+  public void testMiddle() {
+
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, testTags);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("t=DT", features.get(0));
+    Assert.assertEquals("t2=VBZ,DT", features.get(1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java
new file mode 100644
index 0000000..2b66b50
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PreviousTwoMapFeatureGeneratorTest {
+
+  @Test
+  public void testFeatureGeneration() {
+
+    AdaptiveFeatureGenerator fg = new PreviousTwoMapFeatureGenerator();
+
+    String[] sentence = new String[] {"a", "b", "c"};
+
+    List<String> features = new ArrayList<>();
+
+    // this should generate the no features
+    fg.createFeatures(features, sentence, 0, null);
+    Assert.assertEquals(0, features.size());
+
+    // this should generate the pd=null feature
+    fg.createFeatures(features, sentence, 1, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("ppd=null,null", features.get(0));
+
+    features.clear();
+
+    // this should generate the pd=1 feature
+    fg.updateAdaptiveData(sentence, new String[] {"1", "2", "3"});
+    fg.createFeatures(features, sentence, 1, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("ppd=2,1", features.get(0));
+
+    features.clear();
+
+    // this should generate the pd=null feature again after
+    // the adaptive data was cleared
+    fg.clearAdaptiveData();
+    fg.createFeatures(features, sentence, 1, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("ppd=null,null", features.get(0));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java
new file mode 100644
index 0000000..a4cee75
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class SentenceFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+  static String[] testShort = new String[] {"word"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testTT() {
+    AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(true, true);
+
+    generator.createFeatures(features, testSentence, 2, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=begin", features.get(0));
+
+    features.clear();
+
+    generator.createFeatures(features, testSentence, testSentence.length - 1, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=end", features.get(0));
+
+    features.clear();
+
+    generator.createFeatures(features, testShort, 0, null);
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("S=begin", features.get(0));
+    Assert.assertEquals("S=end", features.get(1));
+  }
+
+  @Test
+  public void testTF() {
+    AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(true, false);
+
+    generator.createFeatures(features, testSentence, 2, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=begin", features.get(0));
+
+    features.clear();
+
+    generator.createFeatures(features, testSentence, testSentence.length - 1, null);
+    Assert.assertEquals(0, features.size());
+
+    features.clear();
+
+    generator.createFeatures(features, testShort, 0, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=begin", features.get(0));
+  }
+
+  @Test
+  public void testFT() {
+    AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(false, true);
+
+    generator.createFeatures(features, testSentence, 2, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, testSentence.length - 1, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=end", features.get(0));
+
+    features.clear();
+
+    generator.createFeatures(features, testShort, 0, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=end", features.get(0));
+  }
+
+  @Test
+  public void testFF() {
+    AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(false, false);
+
+    generator.createFeatures(features, testSentence, 2, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, testSentence.length - 1, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testShort, 0, null);
+    Assert.assertEquals(0, features.size());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java
new file mode 100644
index 0000000..b165f90
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TokenClassFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "Example", "sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testGenWAC() {
+
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(true);
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("wc=ic", features.get(0));
+    Assert.assertEquals("w&c=example,ic", features.get(1));
+  }
+
+  @Test
+  public void testNoWAC() {
+
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(false);
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("wc=ic", features.get(0));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java
new file mode 100644
index 0000000..4eae767
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TokenFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void test() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new TokenFeatureGenerator(false);
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("w=This", features.get(0));
+  }
+
+  @Test
+  public void testLowerCase() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new TokenFeatureGenerator(true);
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("w=this", features.get(0));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java
new file mode 100644
index 0000000..1d905db
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TokenPatternFeatureGeneratorTest {
+
+  private List<String> features;
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testSingleToken() {
+
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new TokenPatternFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("st=example", features.get(0));
+  }
+
+  @Test
+  public void testSentence() {
+
+    String[] testSentence = new String[] {"This is an example sentence"};
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new TokenPatternFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    Assert.assertEquals(14, features.size());
+    Assert.assertEquals("stn=5", features.get(0));
+    Assert.assertEquals("pt2=iclc", features.get(1));
+    Assert.assertEquals("pt3=iclclc", features.get(2));
+    Assert.assertEquals("st=this", features.get(3));
+    Assert.assertEquals("pt2=lclc", features.get(4));
+    Assert.assertEquals("pt3=lclclc", features.get(5));
+    Assert.assertEquals("st=is", features.get(6));
+    Assert.assertEquals("pt2=lclc", features.get(7));
+    Assert.assertEquals("pt3=lclclc", features.get(8));
+    Assert.assertEquals("st=an", features.get(9));
+    Assert.assertEquals("pt2=lclc", features.get(10));
+    Assert.assertEquals("st=example", features.get(11));
+    Assert.assertEquals("st=sentence", features.get(12));
+    Assert.assertEquals("pta=iclclclclc", features.get(13));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/81b07ecd/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java
new file mode 100644
index 0000000..1e0cb18
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TrigramNameFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testBegin() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("w,nw,nnw=This,is,an", features.get(0));
+    Assert.assertEquals("wc,nwc,nnwc=ic,lc,lc", features.get(1));
+  }
+
+  @Test
+  public void testNextOfBegin() {
+
+    final int testTokenIndex = 1;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("w,nw,nnw=is,an,example", features.get(0));
+    Assert.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(1));
+  }
+
+  @Test
+  public void testMiddle() {
+
+    final int testTokenIndex = 2;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(4, features.size());
+    Assert.assertEquals("ppw,pw,w=This,is,an", features.get(0));
+    Assert.assertEquals("ppwc,pwc,wc=ic,lc,lc", features.get(1));
+    Assert.assertEquals("w,nw,nnw=an,example,sentence", features.get(2));
+    Assert.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(3));
+  }
+
+  @Test
+  public void testEnd() {
+
+    final int testTokenIndex = 4;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("ppw,pw,w=an,example,sentence", features.get(0));
+    Assert.assertEquals("ppwc,pwc,wc=lc,lc,lc", features.get(1));
+  }
+
+  @Test
+  public void testShort() {
+
+    String[] shortSentence = new String[] {"I", "know", "it"};
+
+    final int testTokenIndex = 1;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, shortSentence, testTokenIndex, null);
+
+    Assert.assertEquals(0, features.size());
+  }
+}

[27/50] [abbrv] opennlp git commit: NoJira: Run jacoco during build and not afterwards

Posted by jo...@apache.org.

NoJira: Run jacoco during build and not afterwards


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/96107813
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/96107813
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/96107813

Branch: refs/heads/parser_regression
Commit: 9610781359e2ffb9a5f09b0c94f3a56a24ca78bc
Parents: 40cdacb
Author: J�rn Kottmann <jo...@apache.org>
Authored: Mon Feb 20 14:22:56 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:54 2017 +0200

----------------------------------------------------------------------
 .travis.yml | 4 ++--
 pom.xml     | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/96107813/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 49d902e..b3399b2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,7 +20,7 @@ before_install:
   - export M2_HOME=$PWD/apache-maven-3.3.9
   - export PATH=$M2_HOME/bin:$PATH
 
-script: mvn clean install
+script: mvn clean install -Pjacoco
 
 after_success:
-  - mvn clean test -Pjacoco jacoco:report coveralls:report
+  - mvn jacoco:report coveralls:report

http://git-wip-us.apache.org/repos/asf/opennlp/blob/96107813/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 8e37452..45d3c37 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,9 +214,6 @@
 					<groupId>org.eluder.coveralls</groupId>
 					<artifactId>coveralls-maven-plugin</artifactId>
 					<version>${coveralls.maven.plugin}</version>
-					<configuration>
-						<repoToken>BD8e0j90KZlQdko7H3wEo5a0mTLhmoeyk</repoToken>
-					</configuration>
 				</plugin>
 
 				<plugin>
@@ -397,6 +394,9 @@
 
 		<profile>
 			<id>jacoco</id>
+			<properties>
+				<opennlp.forkCount>1</opennlp.forkCount>
+			</properties>
 			<build>
 				<plugins>
 					<plugin>

[34/50] [abbrv] opennlp git commit: OPENNLP-904 Harmonize lemmatizer API and function to get multiple lemmas

Posted by jo...@apache.org.

OPENNLP-904 Harmonize lemmatizer API and function to get multiple lemmas

OPENNLP-904 add minor correction after PR comment


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/d3c16d53
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/d3c16d53
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/d3c16d53

Branch: refs/heads/parser_regression
Commit: d3c16d53633595619963114e9499c92fe1d7ee2a
Parents: b78abfb
Author: Rodrigo Agerri <ra...@apache.org>
Authored: Fri Feb 3 16:00:38 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:55 2017 +0200

----------------------------------------------------------------------
 .../cmdline/lemmatizer/LemmatizerMETool.java    |  4 +-
 .../tools/lemmatizer/DictionaryLemmatizer.java  | 70 ++++++++++++++------
 .../lemmatizer/LemmaSampleEventStream.java      |  2 +-
 .../tools/lemmatizer/LemmaSampleStream.java     |  4 +-
 .../opennlp/tools/lemmatizer/Lemmatizer.java    | 16 ++++-
 .../opennlp/tools/lemmatizer/LemmatizerME.java  | 64 ++++++++++++++++--
 .../tools/lemmatizer/DummyLemmatizer.java       |  7 ++
 .../tools/lemmatizer/LemmatizerMETest.java      |  3 +-
 8 files changed, 136 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
index e4e47b5..90ba95d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
@@ -72,10 +72,8 @@ public class LemmatizerMETool extends BasicCmdLineTool {
             continue;
           }
 
-          String[] preds = lemmatizer.lemmatize(posSample.getSentence(),
+          String[] lemmas = lemmatizer.lemmatize(posSample.getSentence(),
               posSample.getTags());
-          String[] lemmas = lemmatizer.decodeLemmas(posSample.getSentence(),
-              preds);
 
           System.out.println(new LemmaSample(posSample.getSentence(),
               posSample.getTags(), lemmas).toString());

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
index b1b04a1..9f0b0b0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
@@ -37,7 +37,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
   /**
    * The hashmap containing the dictionary.
    */
-  private final Map<List<String>, String> dictMap;
+  private final Map<List<String>, List<String>> dictMap;
 
   /**
    * Construct a hashmap from the input tab separated dictionary.
@@ -47,26 +47,24 @@ public class DictionaryLemmatizer implements Lemmatizer {
    * @param dictionary
    *          the input dictionary via inputstream
    */
-  public DictionaryLemmatizer(final InputStream dictionary) {
+  public DictionaryLemmatizer(final InputStream dictionary) throws IOException {
     this.dictMap = new HashMap<>();
-    final BufferedReader breader = new BufferedReader(new InputStreamReader(dictionary));
+    final BufferedReader breader = new BufferedReader(
+        new InputStreamReader(dictionary));
     String line;
-    try {
-      while ((line = breader.readLine()) != null) {
-        final String[] elems = line.split("\t");
-        this.dictMap.put(Arrays.asList(elems[0], elems[1]), elems[2]);
-      }
-    } catch (final IOException e) {
-      e.printStackTrace();
+    while ((line = breader.readLine()) != null) {
+      final String[] elems = line.split("\t");
+      this.dictMap.put(Arrays.asList(elems[0], elems[1]), Arrays.asList(elems[2]));
     }
   }
 
+
   /**
    * Get the Map containing the dictionary.
    *
    * @return dictMap the Map
    */
-  public Map<List<String>, String> getDictMap() {
+  public Map<List<String>, List<String>> getDictMap() {
     return this.dictMap;
   }
 
@@ -85,31 +83,65 @@ public class DictionaryLemmatizer implements Lemmatizer {
     return keys;
   }
 
+
   public String[] lemmatize(final String[] tokens, final String[] postags) {
     List<String> lemmas = new ArrayList<>();
     for (int i = 0; i < tokens.length; i++) {
-      lemmas.add(this.apply(tokens[i], postags[i]));
+      lemmas.add(this.lemmatize(tokens[i], postags[i]));
     }
     return lemmas.toArray(new String[lemmas.size()]);
   }
 
+  public List<List<String>> lemmatize(final List<String> tokens, final List<String> posTags) {
+    List<List<String>> allLemmas = new ArrayList<>();
+    for (int i = 0; i < tokens.size(); i++) {
+      allLemmas.add(this.getAllLemmas(tokens.get(i), posTags.get(i)));
+    }
+    return allLemmas;
+  }
+
   /**
    * Lookup lemma in a dictionary. Outputs "O" if not found.
-   * @param word the token
-   * @param postag the postag
+   *
+   * @param word
+   *          the token
+   * @param postag
+   *          the postag
    * @return the lemma
    */
-  public String apply(final String word, final String postag) {
+  private String lemmatize(final String word, final String postag) {
     String lemma;
     final List<String> keys = this.getDictKeys(word, postag);
     // lookup lemma as value of the map
-    final String keyValue = this.dictMap.get(keys);
-    if (keyValue != null) {
-      lemma = keyValue;
+    final List<String> keyValues = this.dictMap.get(keys);
+    if (!keyValues.isEmpty()) {
+      lemma = keyValues.get(0);
     } else {
       lemma = "O";
     }
     return lemma;
   }
-}
 
+  /**
+   * Lookup every lemma for a word,pos tag in a dictionary. Outputs "O" if not
+   * found.
+   *
+   * @param word
+   *          the token
+   * @param postag
+   *          the postag
+   * @return every lemma
+   */
+  private List<String> getAllLemmas(final String word, final String postag) {
+    List<String> lemmasList = new ArrayList<>();
+    final List<String> keys = this.getDictKeys(word, postag);
+    // lookup lemma as value of the map
+    final List<String> keyValues = this.dictMap.get(keys);
+    if (!keyValues.isEmpty()) {
+      lemmasList.addAll(keyValues);
+    } else {
+      lemmasList.add("O");
+    }
+    return lemmasList;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
index fc1a558..a8d71e8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
@@ -49,7 +49,7 @@ public class LemmaSampleEventStream extends AbstractEventStream<LemmaSample> {
       List<Event> events = new ArrayList<>();
       String[] toksArray = sample.getTokens();
       String[] tagsArray = sample.getTags();
-      String[] lemmasArray = sample.getLemmas();
+      String[] lemmasArray = LemmatizerME.encodeLemmas(toksArray,sample.getLemmas());
       for (int ei = 0, el = sample.getTokens().length; ei < el; ei++) {
         events.add(new Event(lemmasArray[ei],
             contextGenerator.getContext(ei,toksArray,tagsArray,lemmasArray)));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
index 0a133c3..9c661a5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
@@ -23,7 +23,6 @@ import java.util.List;
 
 import opennlp.tools.util.FilterObjectStream;
 import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.StringUtil;
 
 
 /**
@@ -51,8 +50,7 @@ public class LemmaSampleStream extends FilterObjectStream<String, LemmaSample> {
       else {
         toks.add(parts[0]);
         tags.add(parts[1]);
-        String ses = StringUtil.getShortestEditScript(parts[0], parts[2]);
-        preds.add(ses);
+        preds.add(parts[2]);
       }
     }
     if (toks.size() > 0) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
index f21f9e3..933eec1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
@@ -17,19 +17,31 @@
 
 package opennlp.tools.lemmatizer;
 
+import java.util.List;
+
 /**
  * The interface for lemmatizers.
  */
 public interface Lemmatizer {
 
   /**
-   * Generates lemma tags for the word and postag returning the result in an array.
+   * Generates lemmas for the word and postag returning the result in an array.
    *
    * @param toks an array of the tokens
    * @param tags an array of the pos tags
    *
-   * @return an array of lemma classes for each token in the sequence.
+   * @return an array of possible lemmas for each token in the sequence.
    */
   String[] lemmatize(String[] toks, String[] tags);
 
+  /**
+   * Generates a lemma tags for the word and postag returning the result in a list
+   * of every possible lemma for each token and postag.
+   *
+   * @param toks an array of the tokens
+   * @param tags an array of the pos tags
+   * @return a list of every possible lemma for each token in the sequence.
+   */
+  List<List<String>> lemmatize(List<String> toks, List<String> tags);
+
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
index 4855fda..2b8122f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
@@ -19,6 +19,7 @@ package opennlp.tools.lemmatizer;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -47,6 +48,7 @@ import opennlp.tools.util.TrainingParameters;
  */
 public class LemmatizerME implements Lemmatizer {
 
+  public static final int LEMMA_NUMBER = 29;
   public static final int DEFAULT_BEAM_SIZE = 3;
   protected int beamSize;
   private Sequence bestSequence;
@@ -86,9 +88,52 @@ public class LemmatizerME implements Lemmatizer {
   }
 
   public String[] lemmatize(String[] toks, String[] tags) {
+    String[] ses = predictSES(toks, tags);
+    String[] lemmas = decodeLemmas(toks, ses);
+    return lemmas;
+  }
+
+  @Override public List<List<String>> lemmatize(List<String> toks,
+      List<String> tags) {
+    String[] tokens = toks.toArray(new String[toks.size()]);
+    String[] posTags = tags.toArray(new String[tags.size()]);
+    String[][] allLemmas = predictLemmas(LEMMA_NUMBER, tokens, posTags);
+    List<List<String>> predictedLemmas = new ArrayList<>();
+    for (int i = 0; i < allLemmas.length; i++) {
+      predictedLemmas.add(Arrays.asList(allLemmas[i]));
+    }
+    return predictedLemmas;
+  }
+
+  /**
+   * Predict Short Edit Script (automatically induced lemma class).
+   * @param toks the array of tokens
+   * @param tags the array of pos tags
+   * @return an array containing the lemma classes
+   */
+  public String[] predictSES(String[] toks, String[] tags) {
     bestSequence = model.bestSequence(toks, new Object[] {tags}, contextGenerator, sequenceValidator);
-    List<String> c = bestSequence.getOutcomes();
-    return c.toArray(new String[c.size()]);
+    List<String> ses = bestSequence.getOutcomes();
+    return ses.toArray(new String[ses.size()]);
+  }
+
+  /**
+   * Predict all possible lemmas (using a default upper bound).
+   * @param numLemmas the default number of lemmas
+   * @param toks the tokens
+   * @param tags the postags
+   * @return a double array containing all posible lemmas for each token and postag pair
+   */
+  public String[][] predictLemmas(int numLemmas, String[] toks, String[] tags) {
+    Sequence[] bestSequences = model.bestSequences(numLemmas, toks, new Object[] {tags},
+            contextGenerator, sequenceValidator);
+    String[][] allLemmas = new String[bestSequences.length][];
+    for (int i = 0; i < allLemmas.length; i++) {
+      List<String> ses = bestSequences[i].getOutcomes();
+      String[] sesArray = ses.toArray(new String[ses.size()]);
+      allLemmas[i] = decodeLemmas(toks,sesArray);
+    }
+    return allLemmas;
   }
 
   /**
@@ -97,11 +142,10 @@ public class LemmatizerME implements Lemmatizer {
    * @param preds the predicted lemma classes
    * @return the array of decoded lemmas
    */
-  public String[] decodeLemmas(String[] toks, String[] preds) {
+  public static String[] decodeLemmas(String[] toks, String[] preds) {
     List<String> lemmas = new ArrayList<>();
     for (int i = 0; i < toks.length; i++) {
       String lemma = StringUtil.decodeShortestEditScript(toks[i].toLowerCase(), preds[i]);
-      //System.err.println("-> DEBUG: " + toks[i].toLowerCase() + " " + preds[i] + " " + lemma);
       if (lemma.length() == 0) {
         lemma = "_";
       }
@@ -110,6 +154,18 @@ public class LemmatizerME implements Lemmatizer {
     return lemmas.toArray(new String[lemmas.size()]);
   }
 
+  public static String[] encodeLemmas(String[] toks, String[] lemmas) {
+    List<String> sesList = new ArrayList<>();
+    for (int i = 0; i < toks.length; i++) {
+      String ses = StringUtil.getShortestEditScript(toks[i], lemmas[i]);
+      if (ses.length() == 0) {
+        ses = "_";
+      }
+      sesList.add(ses);
+    }
+    return sesList.toArray(new String[sesList.size()]);
+  }
+
   public Sequence[] topKSequences(String[] sentence, String[] tags) {
     return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
         new Object[] { tags }, contextGenerator, sequenceValidator);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
index 489ba38..dcfc883 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
@@ -19,6 +19,7 @@ package opennlp.tools.lemmatizer;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.List;
 
 /**
  * This dummy lemmatizer implementation simulates a LemmatizerME. The file has
@@ -56,4 +57,10 @@ public class DummyLemmatizer implements Lemmatizer {
     }
   }
 
+  @Override
+  public List<List<String>> lemmatize(List<String> toks,
+      List<String> tags) {
+    return null;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d3c16d53/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
index 76b4cd5..97dcc3c 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java
@@ -82,8 +82,7 @@ public class LemmatizerMETest {
   @Test
   public void testLemmasAsArray() throws Exception {
 
-    String[] preds = lemmatizer.lemmatize(tokens, postags);
-    String[] lemmas = lemmatizer.decodeLemmas(tokens, preds);
+    String[] lemmas = lemmatizer.lemmatize(tokens, postags);
 
     Assert.assertArrayEquals(expect, lemmas);
   }

[30/50] [abbrv] opennlp git commit: OPENNLP-989: Fix validation of CONT after START with different type

Posted by jo...@apache.org.

OPENNLP-989: Fix validation of CONT after START with different type

This closes #126


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/b78abfbb
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/b78abfbb
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/b78abfbb

Branch: refs/heads/parser_regression
Commit: b78abfbbba5f26b32dd55ca856f124c659bca758
Parents: 20d0a76
Author: Peter Thygesen <pe...@gmail.com>
Authored: Fri Feb 17 15:17:13 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:55 2017 +0200

----------------------------------------------------------------------
 .../opennlp/tools/namefind/NameFinderSequenceValidator.java  | 5 +++--
 .../test/java/opennlp/tools/eval/Conll02NameFinderEval.java  | 8 ++++----
 .../tools/namefind/NameFinderSequenceValidatorTest.java      | 2 --
 3 files changed, 7 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/b78abfbb/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
index d42e8c5..5143468 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
@@ -35,8 +35,9 @@ public class NameFinderSequenceValidator implements
         return false;
       } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER)) {
         return false;
-      } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE)) {
-        // if it is continue, we have to check if previous match was of the same type
+      } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE) ||
+          outcomesSequence[li].endsWith(NameFinderME.START)) {
+        // if it is continue or start, we have to check if previous match was of the same type
         String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]);
         String nameType = NameFinderME.extractNameType(outcome);
         if (previousNameType != null || nameType != null ) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b78abfbb/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
index d1a71cf..abe53aa 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java
@@ -287,9 +287,9 @@ public class Conll02NameFinderEval {
     TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params,
         combinedType);
 
-    eval(maxentModel, dutchTestAFile, LANGUAGE.NL,   combinedType, 0.6728164867517175d);
+    eval(maxentModel, dutchTestAFile, LANGUAGE.NL,   combinedType, 0.6673209028459275d);
 
-    eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.6985893619774816d);
+    eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.6984085910208306d);
   }
 
   @Test
@@ -507,9 +507,9 @@ public class Conll02NameFinderEval {
     TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params,
         combinedType);
 
-    eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.706765154179857d);
+    eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.707400023454908d);
 
-    eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7583580194667795d);
+    eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7576868829337094d);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b78abfbb/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
index 35752c1..1b2f6ed 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
@@ -17,7 +17,6 @@
 package opennlp.tools.namefind;
 
 import org.junit.Assert;
-import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -55,7 +54,6 @@ public class NameFinderSequenceValidatorTest {
 
   }
 
-  @Ignore
   @Test
   public void testContinueAfterStartAndNotSameType() {

[18/50] [abbrv] opennlp git commit: OPENNLP-176: Switch language codes to ISO-639-3

Posted by jo...@apache.org.

OPENNLP-176: Switch language codes to ISO-639-3

This closes #114


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/eee42316
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/eee42316
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/eee42316

Branch: refs/heads/parser_regression
Commit: eee423166308c454dc176d2d52b12c29e2a08f19
Parents: fdff127
Author: J�rn Kottmann <jo...@apache.org>
Authored: Sun Jan 29 11:06:08 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:53 2017 +0200

----------------------------------------------------------------------
 .../cmdline/namefind/CensusDictionaryCreatorTool.java     |  2 +-
 .../opennlp/tools/cmdline/parser/ParserTrainerTool.java   |  4 ++--
 .../tools/formats/AbstractSampleStreamFactory.java        |  2 +-
 .../tools/formats/Conll03NameSampleStreamFactory.java     |  6 +++---
 .../main/java/opennlp/tools/sentdetect/lang/Factory.java  | 10 +++++-----
 5 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
index 6042510..f9bf5e0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
@@ -50,7 +50,7 @@ public class CensusDictionaryCreatorTool extends BasicCmdLineTool {
   interface Parameters {
 
     @ParameterDescription(valueName = "code")
-    @OptionalParameter(defaultValue = "en")
+    @OptionalParameter(defaultValue = "eng")
     String getLang();
 
     @ParameterDescription(valueName = "charsetName")

http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
index 3a8dd5a..2709fd5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
@@ -90,10 +90,10 @@ public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerT
               params.getHeadRulesSerializerImpl());
     }
     else {
-      if ("en".equals(params.getLang())) {
+      if ("en".equals(params.getLang()) || "eng".equals(params.getLang())) {
         headRulesSerializer = new opennlp.tools.parser.lang.en.HeadRules.HeadRulesSerializer();
       }
-      else if ("es".equals(params.getLang())) {
+      else if ("es".equals(params.getLang()) || "spa".equals(params.getLang())) {
         headRulesSerializer = new opennlp.tools.parser.lang.es.AncoraSpanishHeadRules.HeadRulesSerializer();
       }
       else {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
index 6a7690e..33d0f95 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
@@ -34,7 +34,7 @@ public abstract class AbstractSampleStreamFactory<T> implements ObjectStreamFact
   }
 
   public String getLang() {
-    return "en";
+    return "eng";
   }
 
   @SuppressWarnings({"unchecked"})

http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
index 878565f..599d48a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
@@ -32,7 +32,7 @@ import opennlp.tools.util.ObjectStream;
 public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> {
 
   interface Parameters extends BasicFormatParams {
-    @ParameterDescription(valueName = "en|de")
+    @ParameterDescription(valueName = "eng|deu")
     String getLang();
 
     @ParameterDescription(valueName = "per,loc,org,misc")
@@ -54,11 +54,11 @@ public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<
 
     // TODO: support the other languages with this CoNLL.
     LANGUAGE lang;
-    if ("en".equals(params.getLang())) {
+    if ("eng".equals(params.getLang())) {
       lang = LANGUAGE.EN;
       language = params.getLang();
     }
-    else if ("de".equals(params.getLang())) {
+    else if ("deu".equals(params.getLang())) {
       lang = LANGUAGE.DE;
       language = params.getLang();
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/eee42316/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
index 28b515b..4a34229 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
@@ -49,9 +49,9 @@ public class Factory {
 
   public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) {
 
-    if ("th".equals(languageCode)) {
+    if ("th".equals(languageCode) || "tha".equals(languageCode)) {
       return new SentenceContextGenerator();
-    } else if ("pt".equals(languageCode)) {
+    } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
       return new DefaultSDContextGenerator(abbreviations, ptEosCharacters);
     }
 
@@ -68,11 +68,11 @@ public class Factory {
   }
 
   public char[] getEOSCharacters(String languageCode) {
-    if ("th".equals(languageCode)) {
+    if ("th".equals(languageCode) || "tha".equals(languageCode)) {
       return thEosCharacters;
-    } else if ("pt".equals(languageCode)) {
+    } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
       return ptEosCharacters;
-    } else if ("jp".equals(languageCode)) {
+    } else if ("jp".equals(languageCode) || "jpn".equals(languageCode)) {
       return jpEosCharacters;
     }

[10/50] [abbrv] opennlp git commit: OpenNLP-981: Add training stream hash to AbstractEventTrainer. This closes #118.

Posted by jo...@apache.org.

OpenNLP-981: Add training stream hash to AbstractEventTrainer. This closes #118.


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/cd23b58a
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/cd23b58a
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/cd23b58a

Branch: refs/heads/parser_regression
Commit: cd23b58a3c04053d8c6cafa761aa0fc533774304
Parents: daa9fca
Author: Daniel Russ <dr...@mail.nih.gov>
Authored: Thu Feb 9 09:56:12 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:52 2017 +0200

----------------------------------------------------------------------
 .../opennlp/tools/ml/AbstractEventTrainer.java     |  1 +
 .../ml/perceptron/PerceptronPrepAttachTest.java    | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/cd23b58a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java
index c465f88..bb11aaa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/AbstractEventTrainer.java
@@ -88,6 +88,7 @@ public abstract class AbstractEventTrainer extends AbstractTrainer implements Ev
     HashSumEventStream hses = new HashSumEventStream(events);
     DataIndexer indexer = getDataIndexer(hses);
 
+    addToReport("Training-Eventhash", hses.calculateHashSum().toString(16));
     return train(indexer);
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/cd23b58a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
index d4d70ca..eda49f8 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
@@ -23,6 +23,7 @@ import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.HashMap;
+import java.util.Map;
 
 import org.junit.Assert;
 import org.junit.Test;
@@ -134,4 +135,20 @@ public class PerceptronPrepAttachTest {
     Assert.assertEquals(modelA, modelB);
     Assert.assertEquals(modelA.hashCode(), modelB.hashCode());
   }
+  
+  @Test
+  public void verifyReportMap() throws IOException {
+    TrainingParameters trainParams = new TrainingParameters();
+    trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
+    trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1));
+    // Since we are verifying the report map, we don't need to have more than 1 iteration
+    trainParams.put(AbstractTrainer.ITERATIONS_PARAM, Integer.toString(1));
+    trainParams.put("UseSkippedAveraging", Boolean.toString(true));
+    
+    Map<String,String> reportMap = new HashMap<>();
+    EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, reportMap);
+    trainer.train(PrepAttachDataUtil.createTrainingStream());
+    Assert.assertTrue("Report Map does not contain the training event hash",
+        reportMap.containsKey("Training-Eventhash")); 
+  }
 }

[11/50] [abbrv] opennlp git commit: OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model

Posted by jo...@apache.org.

OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/b41fcd69
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/b41fcd69
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/b41fcd69

Branch: refs/heads/parser_regression
Commit: b41fcd69baef80ed1e99656e9a3b7424aa294bb8
Parents: a2049d6
Author: J�rn Kottmann <jo...@apache.org>
Authored: Thu Feb 2 19:13:02 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:52 2017 +0200

----------------------------------------------------------------------
 .../tagdict/MorfologikPOSTaggerFactory.java     | 15 +------
 .../tools/namefind/TokenNameFinderModel.java    | 14 +-----
 .../opennlp/tools/util/model/BaseModel.java     |  2 +
 .../tools/util/model/ByteArraySerializer.java   | 33 ++++++++++++++
 .../util/model/ByteArraySerializerTest.java     | 45 ++++++++++++++++++++
 5 files changed, 82 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
index 370b4d0..592ef7d 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
@@ -22,7 +22,6 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Map;
@@ -33,7 +32,7 @@ import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.postag.POSTaggerFactory;
 import opennlp.tools.postag.TagDictionary;
 import opennlp.tools.util.model.ArtifactSerializer;
-import opennlp.tools.util.model.ModelUtil;
+import opennlp.tools.util.model.ByteArraySerializer;
 
 public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
 
@@ -150,16 +149,4 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
             info));
     return new MorfologikTagDictionary(dict);
   }
-
-  static class ByteArraySerializer implements ArtifactSerializer<byte[]> {
-
-    public byte[] create(InputStream in) throws IOException {
-      return ModelUtil.read(in);
-    }
-
-    public void serialize(byte[] artifact, OutputStream out) throws IOException {
-      out.write(artifact);
-    }
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
index 05a3615..09eefc5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
@@ -21,7 +21,6 @@ package opennlp.tools.namefind;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.net.URL;
 import java.util.Map;
 import java.util.Properties;
@@ -36,7 +35,7 @@ import opennlp.tools.util.featuregen.BrownCluster;
 import opennlp.tools.util.featuregen.WordClusterDictionary;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.BaseModel;
-import opennlp.tools.util.model.ModelUtil;
+import opennlp.tools.util.model.ByteArraySerializer;
 
 /**
  * The {@link TokenNameFinderModel} is the model used
@@ -53,17 +52,6 @@ public class TokenNameFinderModel extends BaseModel {
     }
   }
 
-  private static class ByteArraySerializer implements ArtifactSerializer<byte[]> {
-
-    public byte[] create(InputStream in) throws IOException {
-      return ModelUtil.read(in);
-    }
-
-    public void serialize(byte[] artifact, OutputStream out) throws IOException {
-      out.write(artifact);
-    }
-  }
-
   private static final String COMPONENT_NAME = "NameFinderME";
   private static final String MAXENT_MODEL_ENTRY_NAME = "nameFinder.model";
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
index 062c787..20acd9d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
@@ -351,6 +351,8 @@ public abstract class BaseModel implements ArtifactProvider, Serializable {
     GenericModelSerializer.register(serializers);
     PropertiesSerializer.register(serializers);
     DictionarySerializer.register(serializers);
+    serializers.put("txt", new ByteArraySerializer());
+    serializers.put("html", new ByteArraySerializer());
 
     return serializers;
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java
new file mode 100644
index 0000000..aa123c4
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.model;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+public class ByteArraySerializer implements ArtifactSerializer<byte[]> {
+
+  public byte[] create(InputStream in) throws IOException {
+    return ModelUtil.read(in);
+  }
+
+  public void serialize(byte[] artifact, OutputStream out) throws IOException {
+    out.write(artifact);
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java
new file mode 100644
index 0000000..a0d7a35
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.model;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class ByteArraySerializerTest {
+
+  @Test
+  public void testSerialization() throws IOException {
+
+    byte[] b = new byte[1024];
+    new Random(23).nextBytes(b);
+
+    ByteArraySerializer serializer = new ByteArraySerializer();
+
+    ByteArrayOutputStream bOut = new ByteArrayOutputStream();
+    serializer.serialize(Arrays.copyOf(b, b.length), bOut) ;
+
+    Assert.assertArrayEquals(b, bOut.toByteArray());
+    Assert.assertArrayEquals(b, serializer.create(new ByteArrayInputStream(b)));
+  }
+}

[19/50] [abbrv] opennlp git commit: NoJira: Adding public RepoToken to investigate Travis coveralls build failures, this closes apache/opennlp#128

Posted by jo...@apache.org.

NoJira: Adding public RepoToken to investigate Travis coveralls build failures, this closes apache/opennlp#128


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fc3b12fa
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fc3b12fa
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fc3b12fa

Branch: refs/heads/parser_regression
Commit: fc3b12fa42c58b1065a621dddd50cb4831ad56ec
Parents: 41f153a
Author: smarthi <sm...@apache.org>
Authored: Thu Feb 16 00:17:53 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:53 2017 +0200

----------------------------------------------------------------------
 .travis.yml | 6 ++++++
 pom.xml     | 3 +++
 2 files changed, 9 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc3b12fa/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index b4c83ad..49d902e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,12 @@ jdk: oraclejdk8
 
 sudo: false
 
+env:
+  global:
+   # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created
+   # via the "travis encrypt" command using the project repo's public key
+   - secure: "WLRKO/tD2rFN+a/HKSf9iZkaMaFE8/luXcJCXGfewoHysF7LgIJ76AN9HY50woVJykl+T/tEhK5c/+H/IKO5zH8Rvz/Q9XxPTvUTOwH7oFOHCQ66mKTvn27Z4fp+JbkPKJuhWDUzPvS/Alo3wE70UELnFRTFoRsemfNNa95uPJobfx5deOfX80mipHOn16dA1q8LuzQa6iF2HIVuh7ygLleTV0cDJyXmIlg3EbKGEitozIv0WkwALrBjLS7KmCcXTKxXqCm1Be2MFRoh9ab2bEooXlv2zRh2wT0c04RckFm1AJGpGQelXLl3NxxcRJSpIN9OTkpVUfwm28TIXk2SzdgPMrP11yFK/DPKTv0jwyk1bFrmZMMso5Y2rP6wjNEtw5ExYSpk3xebcieLJwXhCwkkWAT3DdAAeXO5z4Nf36lryjRgqvlsVF1ofqAK5Sh+qH93/TJOE+hVEj74xUT9pVaxemY61ymvSt8L21XkUsp8T5ILq9jWoaMQCaAwZIaJiHXYjQhmsrFRkNaY4cl9AUGwpHmm750uqhmoVfuJzQg5/vGMZ0LWeCgR9qsG5MG0yijE8ghExUOe7R4gcNAJW2XOfjzMTy74jdsJbsJPUeci/R4wzrXTSCQVJ5nj2LhBF6HyqPyUrIV2MB14gAIItc1LASuB1GLkGoXjIdt0HN8="
+
 cache:
   directories:
     - $HOME/.m2

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc3b12fa/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 268a54e..8e37452 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,6 +214,9 @@
 					<groupId>org.eluder.coveralls</groupId>
 					<artifactId>coveralls-maven-plugin</artifactId>
 					<version>${coveralls.maven.plugin}</version>
+					<configuration>
+						<repoToken>BD8e0j90KZlQdko7H3wEo5a0mTLhmoeyk</repoToken>
+					</configuration>
 				</plugin>
 
 				<plugin>

[43/50] [abbrv] opennlp git commit: this closes apache/opennlp#146

Posted by jo...@apache.org.

this closes apache/opennlp#146


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/178aeb34
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/178aeb34
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/178aeb34

Branch: refs/heads/parser_regression
Commit: 178aeb34763e66ce6a3fd1097a5e95cdfe90d140
Parents: 36de013
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Sat Mar 25 08:17:30 2017 -0400
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:56 2017 +0200

----------------------------------------------------------------------
 opennlp-docs/src/docbkx/namefinder.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/178aeb34/opennlp-docs/src/docbkx/namefinder.xml
----------------------------------------------------------------------
diff --git a/opennlp-docs/src/docbkx/namefinder.xml b/opennlp-docs/src/docbkx/namefinder.xml
index f0f9a44..1ecb13c 100644
--- a/opennlp-docs/src/docbkx/namefinder.xml
+++ b/opennlp-docs/src/docbkx/namefinder.xml
@@ -388,7 +388,7 @@ new NameFinderME(model);]]>
 </generators>]]>
 				 </programlisting>
 		    The root element must be generators, each sub-element adds a feature generator to the configuration.
-		    The sample xml is constains aditional feature generators with respect to the API defined above.
+		    The sample xml is constains additional feature generators with respect to the API defined above.
 			</para>
 			<para>
 			The following table shows the supported elements:

[45/50] [abbrv] opennlp git commit: OPENNLP-1006: Refactor usage of tag constants in sequence validators

Posted by jo...@apache.org.

OPENNLP-1006: Refactor usage of tag constants in sequence validators


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/8abe90d3
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/8abe90d3
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/8abe90d3

Branch: refs/heads/parser_regression
Commit: 8abe90d3f79f4e8bd8da0780bb8368b018aee64b
Parents: 81b07ec
Author: Peter Thygesen <pe...@gmail.com>
Authored: Tue Mar 28 16:59:34 2017 +0200
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:57 2017 +0200

----------------------------------------------------------------------
 .../src/main/java/opennlp/tools/namefind/BioCodec.java   | 10 +++++-----
 .../tools/namefind/NameFinderSequenceValidator.java      | 11 +++++++----
 2 files changed, 12 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/8abe90d3/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
index 2218021..c0570a5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
@@ -118,13 +118,13 @@ public class BioCodec implements SequenceCodec<String> {
 
     for (int i = 0; i < outcomes.length; i++) {
       String outcome = outcomes[i];
-      if (outcome.endsWith(NameFinderME.START)) {
+      if (outcome.endsWith(BioCodec.START)) {
         start.add(outcome.substring(0, outcome.length()
-            - NameFinderME.START.length()));
-      } else if (outcome.endsWith(NameFinderME.CONTINUE)) {
+            - BioCodec.START.length()));
+      } else if (outcome.endsWith(BioCodec.CONTINUE)) {
         cont.add(outcome.substring(0, outcome.length()
-            - NameFinderME.CONTINUE.length()));
-      } else if (!outcome.equals(NameFinderME.OTHER)) {
+            - BioCodec.CONTINUE.length()));
+      } else if (!outcome.equals(BioCodec.OTHER)) {
         // got unexpected outcome
         return false;
       }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/8abe90d3/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
index 5143468..bb6700e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
@@ -19,6 +19,9 @@ package opennlp.tools.namefind;
 
 import opennlp.tools.util.SequenceValidator;
 
+/**
+ * This class is created by the {@link BioCodec}.
+ */
 public class NameFinderSequenceValidator implements
     SequenceValidator<String> {
 
@@ -27,16 +30,16 @@ public class NameFinderSequenceValidator implements
 
     // outcome is formatted like "cont" or "sometype-cont", so we
     // can check if it ends with "cont".
-    if (outcome.endsWith(NameFinderME.CONTINUE)) {
+    if (outcome.endsWith(BioCodec.CONTINUE)) {
 
       int li = outcomesSequence.length - 1;
 
       if (li == -1) {
         return false;
-      } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER)) {
+      } else if (outcomesSequence[li].endsWith(BioCodec.OTHER)) {
         return false;
-      } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE) ||
-          outcomesSequence[li].endsWith(NameFinderME.START)) {
+      } else if (outcomesSequence[li].endsWith(BioCodec.CONTINUE) ||
+          outcomesSequence[li].endsWith(BioCodec.START)) {
         // if it is continue or start, we have to check if previous match was of the same type
         String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]);
         String nameType = NameFinderME.extractNameType(outcome);

[03/50] [abbrv] opennlp git commit: OPENNLP-973: Respect setting of PrintMessages in DataIndexer impls

Posted by jo...@apache.org.

OPENNLP-973: Respect setting of PrintMessages in DataIndexer impls


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/8b479305
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/8b479305
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/8b479305

Branch: refs/heads/parser_regression
Commit: 8b479305453548daa3d2ac5e67ed6a51e805487c
Parents: a52e5af
Author: J�rn Kottmann <jo...@apache.org>
Authored: Wed Feb 1 15:38:19 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:51 2017 +0200

----------------------------------------------------------------------
 .../tools/ml/model/AbstractDataIndexer.java     |  2 +-
 .../tools/ml/model/OnePassDataIndexer.java      | 18 ++++++++---------
 .../tools/ml/model/TwoPassDataIndexer.java      | 21 ++++++++++----------
 3 files changed, 21 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/8b479305/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java
index 0eabf87..be6a6e4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java
@@ -133,7 +133,7 @@ public abstract class AbstractDataIndexer implements DataIndexer {
       throw new InsufficientTrainingDataException("Insufficient training data to create model.");
     }
 
-    if (sort) System.out.println("done. Reduced " + numEvents + " events to " + numUniqueEvents + ".");
+    if (sort) display("done. Reduced " + numEvents + " events to " + numUniqueEvents + ".\n");
 
     contexts = new int[numUniqueEvents][];
     outcomeList = new int[numUniqueEvents];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/8b479305/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java
index 7b53251..cf8fb7f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java
@@ -48,24 +48,24 @@ public class OnePassDataIndexer extends AbstractDataIndexer {
     List<Event> events;
     List<ComparableEvent> eventsToCompare;
 
-    System.out.println("Indexing events using cutoff of " + cutoff + "\n");
+    display("Indexing events using cutoff of " + cutoff + "\n\n");
 
-    System.out.print("\tComputing event counts...  ");
+    display("\tComputing event counts...  ");
     events = computeEventCounts(eventStream, predicateIndex, cutoff);
-    System.out.println("done. " + events.size() + " events");
+    display("done. " + events.size() + " events\n");
 
-    System.out.print("\tIndexing...  ");
+    display("\tIndexing...  ");
     eventsToCompare = index(events, predicateIndex);
     // done with event list
     events = null;
     // done with predicates
     predicateIndex = null;
 
-    System.out.println("done.");
+    display("done.\n");
 
-    System.out.print("Sorting and merging events... ");
+    display("Sorting and merging events... ");
     sortAndMerge(eventsToCompare, sort);
-    System.out.println("Done indexing.");
+    display("Done indexing.\n");
   }
 
   /**
@@ -140,8 +140,8 @@ public class OnePassDataIndexer extends AbstractDataIndexer {
         ce = new ComparableEvent(ocID, cons);
         eventsToCompare.add(ce);
       } else {
-        System.err.println("Dropped event " + ev.getOutcome() + ":"
-            + Arrays.asList(ev.getContext()));
+        display("Dropped event " + ev.getOutcome() + ":"
+            + Arrays.asList(ev.getContext()) + "\n");
       }
       // recycle the TIntArrayList
       indexedContext.clear();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/8b479305/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java
index 133c350..b3cc89b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java
@@ -55,17 +55,17 @@ public class TwoPassDataIndexer extends AbstractDataIndexer {
     Map<String,Integer> predicateIndex = new HashMap<>();
     List<ComparableEvent> eventsToCompare;
 
-    System.out.println("Indexing events using cutoff of " + cutoff + "\n");
+    display("Indexing events using cutoff of " + cutoff + "\n\n");
 
-    System.out.print("\tComputing event counts...  ");
+    display("\tComputing event counts...  ");
 
     File tmp = File.createTempFile("events", null);
     tmp.deleteOnExit();
     Writer osw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmp),"UTF8"));
     int numEvents = computeEventCounts(eventStream, osw, predicateIndex, cutoff);
-    System.out.println("done. " + numEvents + " events");
+    display("done. " + numEvents + " events\n");
 
-    System.out.print("\tIndexing...  ");
+    display("\tIndexing...  ");
 
     try (FileEventStream fes = new FileEventStream(tmp)) {
       eventsToCompare = index(numEvents, fes, predicateIndex);
@@ -73,16 +73,16 @@ public class TwoPassDataIndexer extends AbstractDataIndexer {
     // done with predicates
     predicateIndex = null;
     tmp.delete();
-    System.out.println("done.");
+    display("done.\n");
 
     if (sort) {
-      System.out.print("Sorting and merging events... ");
+      display("Sorting and merging events... ");
     }
     else {
-      System.out.print("Collecting events... ");
+      display("Collecting events... ");
     }
     sortAndMerge(eventsToCompare,sort);
-    System.out.println("Done indexing.");
+    display("Done indexing.\n");
 
   }
   /**
@@ -120,9 +120,11 @@ public class TwoPassDataIndexer extends AbstractDataIndexer {
     return eventCount;
   }
 
+  // TODO: merge this code with the copy and paste version in OnePassDataIndexer
   private List<ComparableEvent> index(int numEvents, ObjectStream<Event> es,
       Map<String,Integer> predicateIndex) throws IOException {
     Map<String,Integer> omap = new HashMap<>();
+
     int outcomeCount = 0;
     List<ComparableEvent> eventsToCompare = new ArrayList<>(numEvents);
     List<Integer> indexedContext = new ArrayList<>();
@@ -159,7 +161,7 @@ public class TwoPassDataIndexer extends AbstractDataIndexer {
         eventsToCompare.add(ce);
       }
       else {
-        System.err.println("Dropped event " + ev.getOutcome() + ":" + Arrays.asList(ev.getContext()));
+        display("Dropped event " + ev.getOutcome() + ":" + Arrays.asList(ev.getContext()) + "\n");
       }
       // recycle the TIntArrayList
       indexedContext.clear();
@@ -168,6 +170,5 @@ public class TwoPassDataIndexer extends AbstractDataIndexer {
     predLabels = toIndexedStringArray(predicateIndex);
     return eventsToCompare;
   }
-
 }

[47/50] [abbrv] opennlp git commit: OPENNLP-1012: Write a test case for NameSampleTypeFilter

Posted by jo...@apache.org.

OPENNLP-1012: Write a test case for NameSampleTypeFilter


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/ef4c6673
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/ef4c6673
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/ef4c6673

Branch: refs/heads/parser_regression
Commit: ef4c6673abd37230048b451465d0e216f3560c60
Parents: 8abe90d
Author: Peter Thygesen <pe...@gmail.com>
Authored: Thu Mar 30 17:01:33 2017 +0200
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:57 2017 +0200

----------------------------------------------------------------------
 .../namefind/NameSampleTypeFilterTest.java      | 102 +++++++++++++++++++
 1 file changed, 102 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/ef4c6673/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTypeFilterTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTypeFilterTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTypeFilterTest.java
new file mode 100644
index 0000000..24ecc9f
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTypeFilterTest.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.Span;
+
+public class NameSampleTypeFilterTest {
+
+    private static NameSampleTypeFilter filter;
+
+    private static final String text = "<START:organization> NATO <END> Secretary - General " +
+            "<START:person> Anders Fogh Rasmussen <END> made clear that despite an intensifying " +
+            "insurgency and uncertainty over whether <START:location> U . S . <END> President " +
+            "<START:person> Barack Obama <END> will send more troops , <START:location> NATO <END> " +
+            "will remain in <START:location> Afghanistan <END> .";
+
+    private static final String person = "person";
+    private static final String organization = "organization";
+
+    @Test
+    public void testNoFilter() throws IOException {
+
+        final String[] types = new String[] {};
+
+        filter = new NameSampleTypeFilter(types, sampleStream(text));
+
+        NameSample ns = filter.read();
+
+        Assert.assertEquals(0, ns.getNames().length);
+
+    }
+
+    @Test
+    public void testSingleFilter() throws IOException {
+
+        final String[] types = new String[] {organization};
+
+        filter = new NameSampleTypeFilter(types, sampleStream(text));
+
+        NameSample ns = filter.read();
+
+        Assert.assertEquals(1, ns.getNames().length);
+        Assert.assertEquals(organization, ns.getNames()[0].getType());
+
+    }
+
+    @Test
+    public void testMultiFilter() throws IOException {
+
+        final String[] types = new String[] {person, organization};
+
+        filter = new NameSampleTypeFilter(types, sampleStream(text));
+
+        NameSample ns = filter.read();
+
+        Map<String, List<Span>> collect = Arrays.stream(ns.getNames())
+                .collect(Collectors.groupingBy(Span::getType));
+        Assert.assertEquals(2, collect.size());
+        Assert.assertEquals(2, collect.get(person).size());
+        Assert.assertEquals(1, collect.get(organization).size());
+
+    }
+
+    private ObjectStream<NameSample> sampleStream(String sampleText) throws IOException {
+
+        InputStreamFactory in = () -> new ByteArrayInputStream(sampleText.getBytes(StandardCharsets.UTF_8));
+
+        return new NameSampleDataStream(
+                new PlainTextByLineStream(in, StandardCharsets.UTF_8));
+
+    }
+
+}

[31/50] [abbrv] opennlp git commit: OPENNLP-1002 Remove deprecated GIS class

Posted by jo...@apache.org.

OPENNLP-1002 Remove deprecated GIS class


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/7487812e
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/7487812e
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/7487812e

Branch: refs/heads/parser_regression
Commit: 7487812ee3fdac314fc266ad70a5418757021ac3
Parents: dd39d06
Author: J�rn Kottmann <jo...@apache.org>
Authored: Fri Mar 10 17:13:36 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:55 2017 +0200

----------------------------------------------------------------------
 .../main/java/opennlp/tools/ml/maxent/GIS.java  | 303 -------------------
 1 file changed, 303 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/7487812e/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
deleted file mode 100644
index 97c214d..0000000
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.ml.maxent;
-
-import java.io.IOException;
-
-import opennlp.tools.ml.AbstractEventTrainer;
-import opennlp.tools.ml.model.AbstractModel;
-import opennlp.tools.ml.model.DataIndexer;
-import opennlp.tools.ml.model.Event;
-import opennlp.tools.ml.model.Prior;
-import opennlp.tools.ml.model.UniformPrior;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.TrainingParameters;
-
-/**
- * A Factory class which uses instances of GISTrainer to create and train
- * GISModels.
- * @deprecated use {@link GISTrainer}
- */
-@Deprecated
-public class GIS extends AbstractEventTrainer {
-
-  public static final String MAXENT_VALUE = "MAXENT";
-
-  /**
-   * Set this to false if you don't want messages about the progress of model
-   * training displayed. Alternately, you can use the overloaded version of
-   * trainModel() to conditionally enable progress messages.
-   */
-  public static boolean PRINT_MESSAGES = true;
-
-  /**
-   * If we are using smoothing, this is used as the "number" of times we want
-   * the trainer to imagine that it saw a feature that it actually didn't see.
-   * Defaulted to 0.1.
-   */
-  private static final double SMOOTHING_OBSERVATION = 0.1;
-
-  private static final String SMOOTHING_PARAM = "smoothing";
-  private static final boolean SMOOTHING_DEFAULT = false;
-
-  public GIS() {
-  }
-
-  public GIS(TrainingParameters parameters) {
-    super(parameters);
-  }
-  
-  public boolean isValid() {
-
-    if (!super.isValid()) {
-      return false;
-    }
-
-    String algorithmName = getAlgorithm();
-
-    return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName)));
-  }
-
-  public boolean isSortAndMerge() {
-    return true;
-  }
-
-  public AbstractModel doTrain(DataIndexer indexer) throws IOException {
-    int iterations = getIterations();
-
-    AbstractModel model;
-
-    boolean printMessages = trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT);
-    boolean smoothing = trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT);
-    int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1);
-
-    model = trainModel(iterations, indexer, printMessages, smoothing, null, threads);
-
-    return model;
-  }
-
-  // << members related to AbstractEventTrainer
-
-  /**
-   * Train a model using the GIS algorithm, assuming 100 iterations and no
-   * cutoff.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream) throws IOException {
-    return trainModel(eventStream, 100, 0, false, PRINT_MESSAGES);
-  }
-
-  /**
-   * Train a model using the GIS algorithm, assuming 100 iterations and no
-   * cutoff.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream, boolean smoothing)
-      throws IOException {
-    return trainModel(eventStream, 100, 0, smoothing, PRINT_MESSAGES);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param cutoff
-   *          The number of times a feature must be seen in order to be relevant
-   *          for training.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
-      int cutoff) throws IOException {
-    return trainModel(eventStream, iterations, cutoff, false, PRINT_MESSAGES);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param cutoff
-   *          The number of times a feature must be seen in order to be relevant
-   *          for training.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @param printMessagesWhileTraining
-   *          Determines whether training status messages are written to STDOUT.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
-      int cutoff, boolean smoothing, boolean printMessagesWhileTraining)
-      throws IOException {
-    GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
-    trainer.setSmoothing(smoothing);
-    trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
-    return trainer.trainModel(eventStream, iterations, cutoff);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param cutoff
-   *          The number of times a feature must be seen in order to be relevant
-   *          for training.
-   * @param sigma
-   *          The standard deviation for the gaussian smoother.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
-      int cutoff, double sigma) throws IOException {
-    GISTrainer trainer = new GISTrainer(PRINT_MESSAGES);
-    if (sigma > 0) {
-      trainer.setGaussianSigma(sigma);
-    }
-    return trainer.trainModel(eventStream, iterations, cutoff);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) {
-    return trainModel(iterations, indexer, true, smoothing, null, 1);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer) {
-    return trainModel(iterations, indexer, true, false, null, 1);
-  }
-
-  /**
-   * Train a model using the GIS algorithm with the specified number of
-   * iterations, data indexer, and prior.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @param modelPrior
-   *          The prior distribution for the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer,
-      Prior modelPrior, int cutoff) {
-    return trainModel(iterations, indexer, true, false, modelPrior, cutoff);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @param printMessagesWhileTraining
-   *          Determines whether training status messages are written to STDOUT.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @param modelPrior
-   *          The prior distribution for the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer,
-                                    boolean printMessagesWhileTraining, boolean smoothing,
-                                    Prior modelPrior) {
-    return trainModel(iterations, indexer, printMessagesWhileTraining, smoothing, modelPrior, 1);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @param printMessagesWhileTraining
-   *          Determines whether training status messages are written to STDOUT.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @param modelPrior
-   *          The prior distribution for the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer,
-                                    boolean printMessagesWhileTraining, boolean smoothing,
-                                    Prior modelPrior, int threads) {
-    GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
-    trainer.setSmoothing(smoothing);
-    trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
-    if (modelPrior == null) {
-      modelPrior = new UniformPrior();
-    }
-    return trainer.trainModel(iterations, indexer, modelPrior, threads);
-  }
-}
-
-
-

[05/50] [abbrv] opennlp git commit: OpenNLP-977: Remove deprecated map methods

Posted by jo...@apache.org.

OpenNLP-977: Remove deprecated map methods


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/51cd8091
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/51cd8091
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/51cd8091

Branch: refs/heads/parser_regression
Commit: 51cd80914b9cddb5771232929b1e9326cecc6170
Parents: 212cf14
Author: Daniel Russ <dr...@mail.nih.gov>
Authored: Mon Feb 6 13:39:59 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:51 2017 +0200

----------------------------------------------------------------------
 .../java/opennlp/tools/chunker/ChunkerME.java   |  6 +-
 .../java/opennlp/tools/cmdline/CmdLineUtil.java |  4 +-
 .../tools/cmdline/parser/ParserTrainerTool.java | 10 +--
 .../cmdline/postag/POSTaggerTrainerTool.java    |  2 +-
 .../sentdetect/SentenceDetectorTrainerTool.java |  2 +-
 .../cmdline/tokenizer/TokenizerTrainerTool.java |  4 +-
 .../tools/doccat/DocumentCategorizerME.java     |  2 +-
 .../opennlp/tools/lemmatizer/LemmatizerME.java  |  8 +-
 .../tools/ml/EventModelSequenceTrainer.java     |  3 +
 .../java/opennlp/tools/ml/EventTrainer.java     |  3 +
 .../java/opennlp/tools/ml/SequenceTrainer.java  |  3 +
 .../java/opennlp/tools/ml/TrainerFactory.java   | 83 +++++++++-----------
 .../opennlp/tools/namefind/NameFinderME.java    |  8 +-
 .../opennlp/tools/parser/chunking/Parser.java   |  4 +-
 .../opennlp/tools/parser/treeinsert/Parser.java |  6 +-
 .../java/opennlp/tools/postag/POSTaggerME.java  |  8 +-
 .../tools/sentdetect/SentenceDetectorME.java    |  2 +-
 .../opennlp/tools/tokenize/TokenizerME.java     |  2 +-
 .../java/opennlp/tools/ml/MockEventTrainer.java |  6 ++
 .../opennlp/tools/ml/MockSequenceTrainer.java   |  6 ++
 .../opennlp/tools/ml/TrainerFactoryTest.java    | 12 +--
 .../tools/ml/maxent/GISIndexingTest.java        |  4 +-
 .../tools/ml/maxent/MaxentPrepAttachTest.java   |  5 +-
 .../ml/maxent/quasinewton/QNPrepAttachTest.java | 11 ++-
 .../ml/naivebayes/NaiveBayesPrepAttachTest.java |  5 +-
 .../ml/perceptron/PerceptronPrepAttachTest.java | 11 ++-
 .../java/opennlp/uima/util/OpennlpUtil.java     |  4 +-
 27 files changed, 114 insertions(+), 110 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
index a59b5ce..71917fb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
@@ -171,7 +171,7 @@ public class ChunkerME implements Chunker {
 
     Map<String, String> manifestInfoEntries = new HashMap<>();
 
-    TrainerType trainerType = TrainerFactory.getTrainerType(mlParams.getSettings());
+    TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
 
 
     MaxentModel chunkerModel = null;
@@ -179,13 +179,13 @@ public class ChunkerME implements Chunker {
 
     if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
       ObjectStream<Event> es = new ChunkerEventStream(in, factory.getContextGenerator());
-      EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams.getSettings(),
+      EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams,
           manifestInfoEntries);
       chunkerModel = trainer.train(es);
     }
     else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
       SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer(
-          mlParams.getSettings(), manifestInfoEntries);
+          mlParams, manifestInfoEntries);
 
       // TODO: This will probably cause issue, since the feature generator uses the outcomes array
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
index 6855898..7ea2a0b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
@@ -324,11 +324,11 @@ public final class CmdLineUtil {
         throw new TerminateToolException(-1, "Error during parameters loading: " + e.getMessage(), e);
       }
 
-      if (!TrainerFactory.isValid(params.getSettings())) {
+      if (!TrainerFactory.isValid(params)) {
         throw new TerminateToolException(1, "Training parameters file '" + paramFile + "' is invalid!");
       }
 
-      TrainerFactory.TrainerType trainerType = TrainerFactory.getTrainerType(params.getSettings());
+      TrainerFactory.TrainerType trainerType = TrainerFactory.getTrainerType(params);
 
       if (!supportSequenceTraining
           && trainerType.equals(TrainerFactory.TrainerType.EVENT_MODEL_SEQUENCE_TRAINER)) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
index 60a4664..3a8dd5a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java
@@ -120,23 +120,23 @@ public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerT
     mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
 
     if (mlParams != null) {
-      if (!TrainerFactory.isValid(mlParams.getSettings("build"))) {
+      if (!TrainerFactory.isValid(mlParams.getParameters("build"))) {
         throw new TerminateToolException(1, "Build training parameters are invalid!");
       }
 
-      if (!TrainerFactory.isValid(mlParams.getSettings("check"))) {
+      if (!TrainerFactory.isValid(mlParams.getParameters("check"))) {
         throw new TerminateToolException(1, "Check training parameters are invalid!");
       }
 
-      if (!TrainerFactory.isValid(mlParams.getSettings("attach"))) {
+      if (!TrainerFactory.isValid(mlParams.getParameters("attach"))) {
         throw new TerminateToolException(1, "Attach training parameters are invalid!");
       }
 
-      if (!TrainerFactory.isValid(mlParams.getSettings("tagger"))) {
+      if (!TrainerFactory.isValid(mlParams.getParameters("tagger"))) {
         throw new TerminateToolException(1, "Tagger training parameters are invalid!");
       }
 
-      if (!TrainerFactory.isValid(mlParams.getSettings("chunker"))) {
+      if (!TrainerFactory.isValid(mlParams.getParameters("chunker"))) {
         throw new TerminateToolException(1, "Chunker training parameters are invalid!");
       }
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
index 3c7b618..4a78602 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
@@ -56,7 +56,7 @@ public final class POSTaggerTrainerTool
     super.run(format, args);
 
     mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
-    if (mlParams != null && !TrainerFactory.isValid(mlParams.getSettings())) {
+    if (mlParams != null && !TrainerFactory.isValid(mlParams)) {
       throw new TerminateToolException(1, "Training parameters file '" + params.getParams() +
           "' is invalid!");
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
index 85bb06f..cdd6916 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
@@ -65,7 +65,7 @@ public final class SentenceDetectorTrainerTool
     mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
 
     if (mlParams != null) {
-      if (!TrainerType.EVENT_MODEL_TRAINER.equals(TrainerFactory.getTrainerType(mlParams.getSettings()))) {
+      if (!TrainerType.EVENT_MODEL_TRAINER.equals(TrainerFactory.getTrainerType(mlParams))) {
         throw new TerminateToolException(1, "Sequence training is not supported!");
       }
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
index bb722d0..bcf37de 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
@@ -63,12 +63,12 @@ public final class TokenizerTrainerTool
     mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
 
     if (mlParams != null) {
-      if (!TrainerFactory.isValid(mlParams.getSettings())) {
+      if (!TrainerFactory.isValid(mlParams)) {
         throw new TerminateToolException(1, "Training parameters file '" + params.getParams() +
             "' is invalid!");
       }
 
-      if (!TrainerType.EVENT_MODEL_TRAINER.equals(TrainerFactory.getTrainerType(mlParams.getSettings()))) {
+      if (!TrainerType.EVENT_MODEL_TRAINER.equals(TrainerFactory.getTrainerType(mlParams))) {
         throw new TerminateToolException(1, "Sequence training is not supported!");
       }
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
index b65ce8b..33151d9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
@@ -236,7 +236,7 @@ public class DocumentCategorizerME implements DocumentCategorizer {
     Map<String, String> manifestInfoEntries = new HashMap<>();
 
     EventTrainer trainer = TrainerFactory.getEventTrainer(
-        mlParams.getSettings(), manifestInfoEntries);
+        mlParams, manifestInfoEntries);
 
     MaxentModel model = trainer.train(
         new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators()));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
index 98a19f5..4855fda 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
@@ -157,26 +157,26 @@ public class LemmatizerME implements Lemmatizer {
 
     Map<String, String> manifestInfoEntries = new HashMap<>();
 
-    TrainerType trainerType = TrainerFactory.getTrainerType(trainParams.getSettings());
+    TrainerType trainerType = TrainerFactory.getTrainerType(trainParams);
 
     MaxentModel lemmatizerModel = null;
     SequenceClassificationModel<String> seqLemmatizerModel = null;
     if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
       ObjectStream<Event> es = new LemmaSampleEventStream(samples, contextGenerator);
 
-      EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams.getSettings(),
+      EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams,
           manifestInfoEntries);
       lemmatizerModel = trainer.train(es);
     }
     else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) {
       LemmaSampleSequenceStream ss = new LemmaSampleSequenceStream(samples, contextGenerator);
       EventModelSequenceTrainer trainer =
-          TrainerFactory.getEventModelSequenceTrainer(trainParams.getSettings(), manifestInfoEntries);
+          TrainerFactory.getEventModelSequenceTrainer(trainParams, manifestInfoEntries);
       lemmatizerModel = trainer.train(ss);
     }
     else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
       SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer(
-          trainParams.getSettings(), manifestInfoEntries);
+          trainParams, manifestInfoEntries);
 
       // TODO: This will probably cause issue, since the feature generator uses the outcomes array
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/ml/EventModelSequenceTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/EventModelSequenceTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/EventModelSequenceTrainer.java
index 6010041..676490c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/EventModelSequenceTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/EventModelSequenceTrainer.java
@@ -22,12 +22,15 @@ import java.util.Map;
 
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceStream;
+import opennlp.tools.util.TrainingParameters;
 
 public interface EventModelSequenceTrainer {
 
   String SEQUENCE_VALUE = "EventModelSequence";
 
+  @Deprecated
   void init(Map<String, String> trainParams, Map<String, String> reportMap);
+  void init(TrainingParameters trainParams, Map<String, String> reportMap);
 
   MaxentModel train(SequenceStream events) throws IOException;
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java
index 998262b..2a98b86 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java
@@ -24,12 +24,15 @@ import opennlp.tools.ml.model.DataIndexer;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
 
 public interface EventTrainer {
 
   String EVENT_VALUE = "Event";
 
+  @Deprecated
   void init(Map<String, String> trainParams, Map<String, String> reportMap);
+  void init(TrainingParameters trainingParams, Map<String, String> reportMap);
 
   MaxentModel train(ObjectStream<Event> events) throws IOException;
   MaxentModel train(DataIndexer indexer) throws IOException;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/ml/SequenceTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/SequenceTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/SequenceTrainer.java
index 7443527..3eedea4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/SequenceTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/SequenceTrainer.java
@@ -22,12 +22,15 @@ import java.util.Map;
 
 import opennlp.tools.ml.model.SequenceClassificationModel;
 import opennlp.tools.ml.model.SequenceStream;
+import opennlp.tools.util.TrainingParameters;
 
 public interface SequenceTrainer {
 
   String SEQUENCE_VALUE = "Sequence";
 
+  @Deprecated
   void init(Map<String, String> trainParams, Map<String, String> reportMap);
+  void init(TrainingParameters trainParams, Map<String, String> reportMap);
 
   SequenceClassificationModel<String> train(SequenceStream events) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java
index 7897cf2..302035b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java
@@ -27,6 +27,7 @@ import opennlp.tools.ml.maxent.quasinewton.QNTrainer;
 import opennlp.tools.ml.naivebayes.NaiveBayesTrainer;
 import opennlp.tools.ml.perceptron.PerceptronTrainer;
 import opennlp.tools.ml.perceptron.SimplePerceptronSequenceTrainer;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.ext.ExtensionLoader;
 import opennlp.tools.util.ext.ExtensionNotLoadedException;
 
@@ -59,16 +60,16 @@ public class TrainerFactory {
    * @param trainParams - Map of training parameters
    * @return the trainer type or null if type couldn't be determined.
    */
-  public static TrainerType getTrainerType(Map<String, String> trainParams) {
+  public static TrainerType getTrainerType(TrainingParameters trainParams) {
 
-    String alogrithmValue = trainParams.get(AbstractTrainer.ALGORITHM_PARAM);
+    String algorithmValue = trainParams.getStringParameter(AbstractTrainer.ALGORITHM_PARAM,null);
 
     // Check if it is defaulting to the MAXENT trainer
-    if (alogrithmValue == null) {
+    if (algorithmValue == null) {
       return TrainerType.EVENT_MODEL_TRAINER;
     }
 
-    Class<?> trainerClass = BUILTIN_TRAINERS.get(alogrithmValue);
+    Class<?> trainerClass = BUILTIN_TRAINERS.get(algorithmValue);
 
     if (trainerClass != null) {
 
@@ -86,7 +87,7 @@ public class TrainerFactory {
     // Try to load the different trainers, and return the type on success
 
     try {
-      ExtensionLoader.instantiateExtension(EventTrainer.class, alogrithmValue);
+      ExtensionLoader.instantiateExtension(EventTrainer.class, algorithmValue);
       return TrainerType.EVENT_MODEL_TRAINER;
     }
     catch (ExtensionNotLoadedException ignored) {
@@ -94,7 +95,7 @@ public class TrainerFactory {
     }
 
     try {
-      ExtensionLoader.instantiateExtension(EventModelSequenceTrainer.class, alogrithmValue);
+      ExtensionLoader.instantiateExtension(EventModelSequenceTrainer.class, algorithmValue);
       return TrainerType.EVENT_MODEL_SEQUENCE_TRAINER;
     }
     catch (ExtensionNotLoadedException ignored) {
@@ -102,7 +103,7 @@ public class TrainerFactory {
     }
 
     try {
-      ExtensionLoader.instantiateExtension(SequenceTrainer.class, alogrithmValue);
+      ExtensionLoader.instantiateExtension(SequenceTrainer.class, algorithmValue);
       return TrainerType.SEQUENCE_TRAINER;
     }
     catch (ExtensionNotLoadedException ignored) {
@@ -112,9 +113,9 @@ public class TrainerFactory {
     return null;
   }
 
-  public static SequenceTrainer getSequenceModelTrainer(Map<String, String> trainParams,
+  public static SequenceTrainer getSequenceModelTrainer(TrainingParameters trainParams,
       Map<String, String> reportMap) {
-    String trainerType = trainParams.get(AbstractTrainer.ALGORITHM_PARAM);
+    String trainerType = trainParams.getStringParameter(AbstractTrainer.ALGORITHM_PARAM,null);
 
     if (trainerType != null) {
       if (BUILTIN_TRAINERS.containsKey(trainerType)) {
@@ -134,9 +135,10 @@ public class TrainerFactory {
     }
   }
 
-  public static EventModelSequenceTrainer getEventModelSequenceTrainer(Map<String, String> trainParams,
+  public static EventModelSequenceTrainer getEventModelSequenceTrainer(TrainingParameters trainParams,
       Map<String, String> reportMap) {
-    String trainerType = trainParams.get(AbstractTrainer.ALGORITHM_PARAM);
+    String trainerType = trainParams.getStringParameter(AbstractTrainer.ALGORITHM_PARAM,null);
+
     if (trainerType != null) {
       if (BUILTIN_TRAINERS.containsKey(trainerType)) {
         EventModelSequenceTrainer trainer = TrainerFactory.<EventModelSequenceTrainer>createBuiltinTrainer(
@@ -155,34 +157,30 @@ public class TrainerFactory {
     }
   }
 
-  public static EventTrainer getEventTrainer(Map<String, String> trainParams,
+  public static EventTrainer getEventTrainer(TrainingParameters trainParams,
       Map<String, String> reportMap) {
-    String trainerType = trainParams.get(AbstractTrainer.ALGORITHM_PARAM);
-    if (trainerType == null) {
-      // default to MAXENT
-      AbstractEventTrainer trainer = new GISTrainer();
+
+    // if the trainerType is not defined -- use the GISTrainer.
+    String trainerType = 
+        trainParams.getStringParameter(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
+
+    if (BUILTIN_TRAINERS.containsKey(trainerType)) {
+      EventTrainer trainer = TrainerFactory.<EventTrainer>createBuiltinTrainer(
+          BUILTIN_TRAINERS.get(trainerType));
+      trainer.init(trainParams, reportMap);
+      return trainer;
+    } else {
+      EventTrainer trainer = ExtensionLoader.instantiateExtension(EventTrainer.class, trainerType);
       trainer.init(trainParams, reportMap);
       return trainer;
     }
-    else {
-      if (BUILTIN_TRAINERS.containsKey(trainerType)) {
-        EventTrainer trainer = TrainerFactory.<EventTrainer>createBuiltinTrainer(
-            BUILTIN_TRAINERS.get(trainerType));
-        trainer.init(trainParams, reportMap);
-        return trainer;
-      } else {
-        EventTrainer trainer = ExtensionLoader.instantiateExtension(EventTrainer.class, trainerType);
-        trainer.init(trainParams, reportMap);
-        return trainer;
-      }
-    }
+
   }
 
-  public static boolean isValid(Map<String, String> trainParams) {
+  public static boolean isValid(TrainingParameters trainParams) {
 
     // TODO: Need to validate all parameters correctly ... error prone?!
-
-    String algorithmName = trainParams.get(AbstractTrainer.ALGORITHM_PARAM);
+    String algorithmName = trainParams.getStringParameter(AbstractTrainer.ALGORITHM_PARAM,null);
 
     // If a trainer type can be determined, then the trainer is valid!
     if (algorithmName != null &&
@@ -191,28 +189,17 @@ public class TrainerFactory {
     }
 
     try {
-      String cutoffString = trainParams.get(AbstractTrainer.CUTOFF_PARAM);
-      if (cutoffString != null) {
-        Integer.parseInt(cutoffString);
-      }
-
-      String iterationsString = trainParams.get(AbstractTrainer.ITERATIONS_PARAM);
-      if (iterationsString != null) {
-        Integer.parseInt(iterationsString);
-      }
+      // require that the Cutoff and the number of iterations be an integer.
+      // if they are not set, the default values will be ok.
+      trainParams.getIntParameter(AbstractTrainer.CUTOFF_PARAM, 0);
+      trainParams.getIntParameter(AbstractTrainer.ITERATIONS_PARAM, 0);
     }
     catch (NumberFormatException e) {
       return false;
     }
 
-    String dataIndexer = trainParams.get(AbstractEventTrainer.DATA_INDEXER_PARAM);
-
-    if (dataIndexer != null) {
-      if (!(AbstractEventTrainer.DATA_INDEXER_ONE_PASS_VALUE.equals(dataIndexer)
-          || AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE.equals(dataIndexer))) {
-        return false;
-      }
-    }
+    // no reason to require that the dataIndexer be a 1-pass or 2-pass dataindexer.
+    trainParams.getStringParameter(AbstractEventTrainer.DATA_INDEXER_PARAM, null);
 
     // TODO: Check data indexing ...
     return true;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
index bb3603e..6ce0b83 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
@@ -232,13 +232,13 @@ public class NameFinderME implements TokenNameFinder {
 
     SequenceClassificationModel<String> seqModel = null;
 
-    TrainerType trainerType = TrainerFactory.getTrainerType(trainParams.getSettings());
+    TrainerType trainerType = TrainerFactory.getTrainerType(trainParams);
 
     if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
       ObjectStream<Event> eventStream = new NameFinderEventStream(samples, type,
               factory.createContextGenerator(), factory.createSequenceCodec());
 
-      EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams.getSettings(), manifestInfoEntries);
+      EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, manifestInfoEntries);
       nameFinderModel = trainer.train(eventStream);
     } // TODO: Maybe it is not a good idea, that these two don't use the context generator ?!
     // These also don't use the sequence codec ?!
@@ -246,11 +246,11 @@ public class NameFinderME implements TokenNameFinder {
       NameSampleSequenceStream ss = new NameSampleSequenceStream(samples, factory.createContextGenerator());
 
       EventModelSequenceTrainer trainer = TrainerFactory.getEventModelSequenceTrainer(
-              trainParams.getSettings(), manifestInfoEntries);
+              trainParams, manifestInfoEntries);
       nameFinderModel = trainer.train(ss);
     } else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
       SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer(
-              trainParams.getSettings(), manifestInfoEntries);
+              trainParams, manifestInfoEntries);
 
       NameSampleSequenceStream ss =
           new NameSampleSequenceStream(samples, factory.createContextGenerator(), false);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
index 394b955..53a8cba 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
@@ -284,7 +284,7 @@ public class Parser extends AbstractBottomUpParser {
     ObjectStream<Event> bes = new ParserEventStream(parseSamples, rules, ParserEventTypeEnum.BUILD, mdict);
     Map<String, String> buildReportMap = new HashMap<>();
     EventTrainer buildTrainer =
-        TrainerFactory.getEventTrainer(mlParams.getSettings("build"), buildReportMap);
+        TrainerFactory.getEventTrainer(mlParams.getParameters("build"), buildReportMap);
     MaxentModel buildModel = buildTrainer.train(bes);
     mergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");
 
@@ -314,7 +314,7 @@ public class Parser extends AbstractBottomUpParser {
     ObjectStream<Event> kes = new ParserEventStream(parseSamples, rules, ParserEventTypeEnum.CHECK);
     Map<String, String> checkReportMap = new HashMap<>();
     EventTrainer checkTrainer =
-        TrainerFactory.getEventTrainer(mlParams.getSettings("check"), checkReportMap);
+        TrainerFactory.getEventTrainer(mlParams.getParameters("check"), checkReportMap);
     MaxentModel checkModel = checkTrainer.train(kes);
     mergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
index d3904a9..527bdb6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
@@ -471,7 +471,7 @@ public class Parser extends AbstractBottomUpParser {
     Map<String, String> buildReportMap = new HashMap<>();
 
     EventTrainer buildTrainer = TrainerFactory.getEventTrainer(
-        mlParams.getSettings("build"), buildReportMap);
+        mlParams.getParameters("build"), buildReportMap);
     MaxentModel buildModel = buildTrainer.train(bes);
     opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(
         manifestInfoEntries, buildReportMap, "build");
@@ -485,7 +485,7 @@ public class Parser extends AbstractBottomUpParser {
     Map<String, String> checkReportMap = new HashMap<>();
 
     EventTrainer checkTrainer = TrainerFactory.getEventTrainer(
-        mlParams.getSettings("check"), checkReportMap);
+        mlParams.getParameters("check"), checkReportMap);
     MaxentModel checkModel = checkTrainer.train(kes);
     opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(
         manifestInfoEntries, checkReportMap, "check");
@@ -498,7 +498,7 @@ public class Parser extends AbstractBottomUpParser {
         ParserEventTypeEnum.ATTACH);
     Map<String, String> attachReportMap = new HashMap<>();
     EventTrainer attachTrainer = TrainerFactory.getEventTrainer(
-        mlParams.getSettings("attach"), attachReportMap);
+        mlParams.getParameters("attach"), attachReportMap);
     MaxentModel attachModel = attachTrainer.train(attachEvents);
     opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(
         manifestInfoEntries, attachReportMap, "attach");

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
index a0ffefc..5415ba7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
@@ -236,26 +236,26 @@ public class POSTaggerME implements POSTagger {
 
     Map<String, String> manifestInfoEntries = new HashMap<>();
 
-    TrainerType trainerType = TrainerFactory.getTrainerType(trainParams.getSettings());
+    TrainerType trainerType = TrainerFactory.getTrainerType(trainParams);
 
     MaxentModel posModel = null;
     SequenceClassificationModel<String> seqPosModel = null;
     if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
       ObjectStream<Event> es = new POSSampleEventStream(samples, contextGenerator);
 
-      EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams.getSettings(),
+      EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams,
           manifestInfoEntries);
       posModel = trainer.train(es);
     }
     else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) {
       POSSampleSequenceStream ss = new POSSampleSequenceStream(samples, contextGenerator);
       EventModelSequenceTrainer trainer =
-          TrainerFactory.getEventModelSequenceTrainer(trainParams.getSettings(), manifestInfoEntries);
+          TrainerFactory.getEventModelSequenceTrainer(trainParams, manifestInfoEntries);
       posModel = trainer.train(ss);
     }
     else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
       SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer(
-          trainParams.getSettings(), manifestInfoEntries);
+          trainParams, manifestInfoEntries);
 
       // TODO: This will probably cause issue, since the feature generator uses the outcomes array
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
index 282147d..2f3fd6c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
@@ -320,7 +320,7 @@ public class SentenceDetectorME implements SentenceDetector {
     ObjectStream<Event> eventStream = new SDEventStream(samples,
         sdFactory.getSDContextGenerator(), sdFactory.getEndOfSentenceScanner());
 
-    EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams.getSettings(), manifestInfoEntries);
+    EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries);
 
     MaxentModel sentModel = trainer.train(eventStream);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
index a96e7cf..6d54308 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
@@ -242,7 +242,7 @@ public class TokenizerME extends AbstractTokenizer {
         factory.getAlphaNumericPattern(), factory.getContextGenerator());
 
     EventTrainer trainer = TrainerFactory.getEventTrainer(
-        mlParams.getSettings(), manifestInfoEntries);
+        mlParams, manifestInfoEntries);
 
     MaxentModel maxentModel = trainer.train(eventStream);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/test/java/opennlp/tools/ml/MockEventTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/MockEventTrainer.java b/opennlp-tools/src/test/java/opennlp/tools/ml/MockEventTrainer.java
index 844ef1c..0de7c96 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/MockEventTrainer.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/MockEventTrainer.java
@@ -24,6 +24,7 @@ import opennlp.tools.ml.model.DataIndexer;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
 
 public class MockEventTrainer implements EventTrainer {
 
@@ -40,4 +41,9 @@ public class MockEventTrainer implements EventTrainer {
   public void init(Map<String, String> trainParams,
       Map<String, String> reportMap) {
   }
+
+  @Override
+  public void init(TrainingParameters trainingParams,
+      Map<String, String> reportMap) {
+  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/test/java/opennlp/tools/ml/MockSequenceTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/MockSequenceTrainer.java b/opennlp-tools/src/test/java/opennlp/tools/ml/MockSequenceTrainer.java
index a323dfe..19a8aaa 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/MockSequenceTrainer.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/MockSequenceTrainer.java
@@ -22,6 +22,7 @@ import java.util.Map;
 
 import opennlp.tools.ml.model.AbstractModel;
 import opennlp.tools.ml.model.SequenceStream;
+import opennlp.tools.util.TrainingParameters;
 
 public class MockSequenceTrainer implements EventModelSequenceTrainer {
 
@@ -34,4 +35,9 @@ public class MockSequenceTrainer implements EventModelSequenceTrainer {
       Map<String, String> reportMap) {
   }
 
+  @Override
+  public void init(TrainingParameters trainParams,
+      Map<String, String> reportMap) {
+  }
+  
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/test/java/opennlp/tools/ml/TrainerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/TrainerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/TrainerFactoryTest.java
index 092742c..f7ac117 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/TrainerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/TrainerFactoryTest.java
@@ -40,25 +40,25 @@ public class TrainerFactoryTest {
 
   @Test
   public void testBuiltInValid() {
-    Assert.assertTrue(TrainerFactory.isValid(mlParams.getSettings()));
+    Assert.assertTrue(TrainerFactory.isValid(mlParams));
   }
 
   @Test
   public void testSequenceTrainerValid() {
     mlParams.put(TrainingParameters.ALGORITHM_PARAM, MockSequenceTrainer.class.getCanonicalName());
-    Assert.assertTrue(TrainerFactory.isValid(mlParams.getSettings()));
+    Assert.assertTrue(TrainerFactory.isValid(mlParams));
   }
 
   @Test
   public void testEventTrainerValid() {
     mlParams.put(TrainingParameters.ALGORITHM_PARAM, MockEventTrainer.class.getCanonicalName());
-    Assert.assertTrue(TrainerFactory.isValid(mlParams.getSettings()));
+    Assert.assertTrue(TrainerFactory.isValid(mlParams));
   }
 
   @Test
   public void testInvalidTrainer() {
     mlParams.put(TrainingParameters.ALGORITHM_PARAM, "xyz");
-    Assert.assertFalse(TrainerFactory.isValid(mlParams.getSettings()));
+    Assert.assertFalse(TrainerFactory.isValid(mlParams));
   }
 
   @Test
@@ -66,7 +66,7 @@ public class TrainerFactoryTest {
     mlParams.put(AbstractTrainer.ALGORITHM_PARAM,
         SimplePerceptronSequenceTrainer.PERCEPTRON_SEQUENCE_VALUE);
 
-    TrainerType trainerType = TrainerFactory.getTrainerType(mlParams.getSettings());
+    TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
 
     Assert.assertTrue(TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType));
   }
@@ -74,7 +74,7 @@ public class TrainerFactoryTest {
   @Test
   public void testIsSequenceTrainerFalse() {
     mlParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
-    TrainerType trainerType = TrainerFactory.getTrainerType(mlParams.getSettings());
+    TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
     Assert.assertFalse(TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType));
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
index 5a98f73..6922603 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
@@ -119,7 +119,7 @@ public class GISIndexingTest {
 
     // guarantee that you have a GIS trainer...
     EventTrainer trainer =
-        TrainerFactory.getEventTrainer(parameters.getSettings(), new HashMap<>());
+        TrainerFactory.getEventTrainer(parameters, new HashMap<>());
     Assert.assertEquals("opennlp.tools.ml.maxent.GISTrainer", trainer.getClass().getName());
     AbstractEventTrainer aeTrainer = (AbstractEventTrainer)trainer;
     // guarantee that you have a OnePassDataIndexer ...
@@ -137,7 +137,7 @@ public class GISIndexingTest {
     parameters.put(AbstractEventTrainer.DATA_INDEXER_PARAM, AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE);
     parameters.put(AbstractEventTrainer.CUTOFF_PARAM, "2");
     
-    trainer = TrainerFactory.getEventTrainer(parameters.getSettings(), new HashMap<>());
+    trainer = TrainerFactory.getEventTrainer(parameters, new HashMap<>());
     Assert.assertEquals("opennlp.tools.ml.maxent.quasinewton.QNTrainer", trainer.getClass().getName());
     aeTrainer = (AbstractEventTrainer)trainer;
     di = aeTrainer.getDataIndexer(eventStream);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MaxentPrepAttachTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MaxentPrepAttachTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MaxentPrepAttachTest.java
index 74b13de..36e8926 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MaxentPrepAttachTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MaxentPrepAttachTest.java
@@ -19,7 +19,6 @@ package opennlp.tools.ml.maxent;
 
 import java.io.IOException;
 import java.util.HashMap;
-import java.util.Map;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -75,7 +74,7 @@ public class MaxentPrepAttachTest {
   @Test
   public void testMaxentOnPrepAttachDataWithParams() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
     trainParams.put(AbstractEventTrainer.DATA_INDEXER_PARAM,
         AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE);
@@ -90,7 +89,7 @@ public class MaxentPrepAttachTest {
   @Test
   public void testMaxentOnPrepAttachDataWithParamsDefault() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
 
     EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java
index c4f5cea..c01aa76 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNPrepAttachTest.java
@@ -19,7 +19,6 @@ package opennlp.tools.ml.maxent.quasinewton;
 
 import java.io.IOException;
 import java.util.HashMap;
-import java.util.Map;
 
 import org.junit.Test;
 
@@ -53,7 +52,7 @@ public class QNPrepAttachTest {
   @Test
   public void testQNOnPrepAttachDataWithParamsDefault() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
 
     MaxentModel model = TrainerFactory.getEventTrainer(trainParams, null)
@@ -65,7 +64,7 @@ public class QNPrepAttachTest {
   @Test
   public void testQNOnPrepAttachDataWithElasticNetParams() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
     trainParams.put(AbstractEventTrainer.DATA_INDEXER_PARAM,
         AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE);
@@ -82,7 +81,7 @@ public class QNPrepAttachTest {
   @Test
   public void testQNOnPrepAttachDataWithL1Params() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
     trainParams.put(AbstractEventTrainer.DATA_INDEXER_PARAM,
         AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE);
@@ -99,7 +98,7 @@ public class QNPrepAttachTest {
   @Test
   public void testQNOnPrepAttachDataWithL2Params() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
     trainParams.put(AbstractEventTrainer.DATA_INDEXER_PARAM,
         AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE);
@@ -116,7 +115,7 @@ public class QNPrepAttachTest {
   @Test
   public void testQNOnPrepAttachDataInParallel() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE);
     trainParams.put(QNTrainer.THREADS_PARAM, Integer.toString(2));
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesPrepAttachTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesPrepAttachTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesPrepAttachTest.java
index 019e553..e994ba1 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesPrepAttachTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesPrepAttachTest.java
@@ -19,7 +19,6 @@ package opennlp.tools.ml.naivebayes;
 
 import java.io.IOException;
 import java.util.HashMap;
-import java.util.Map;
 
 import org.junit.Assert;
 import org.junit.Before;
@@ -61,7 +60,7 @@ public class NaiveBayesPrepAttachTest {
 
   @Test
   public void testNaiveBayesOnPrepAttachDataUsingTrainUtil() throws IOException {
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, NaiveBayesTrainer.NAIVE_BAYES_VALUE);
     trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1));
 
@@ -73,7 +72,7 @@ public class NaiveBayesPrepAttachTest {
 
   @Test
   public void testNaiveBayesOnPrepAttachDataUsingTrainUtilWithCutoff5() throws IOException {
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, NaiveBayesTrainer.NAIVE_BAYES_VALUE);
     trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(5));
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
index 0e2c140..d4d70ca 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/perceptron/PerceptronPrepAttachTest.java
@@ -23,7 +23,6 @@ import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.HashMap;
-import java.util.Map;
 
 import org.junit.Assert;
 import org.junit.Test;
@@ -58,7 +57,7 @@ public class PerceptronPrepAttachTest {
   @Test
   public void testPerceptronOnPrepAttachDataWithSkippedAveraging() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
     trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1));
     trainParams.put("UseSkippedAveraging", Boolean.toString(true));
@@ -71,7 +70,7 @@ public class PerceptronPrepAttachTest {
   @Test
   public void testPerceptronOnPrepAttachDataWithTolerance() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
     trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1));
     trainParams.put(AbstractTrainer.ITERATIONS_PARAM, Integer.toString(500));
@@ -85,7 +84,7 @@ public class PerceptronPrepAttachTest {
   @Test
   public void testPerceptronOnPrepAttachDataWithStepSizeDecrease() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
     trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1));
     trainParams.put(AbstractTrainer.ITERATIONS_PARAM, Integer.toString(500));
@@ -99,7 +98,7 @@ public class PerceptronPrepAttachTest {
   @Test
   public void testModelSerialization() throws IOException {
 
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
     trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1));
     trainParams.put("UseSkippedAveraging", Boolean.toString(true));
@@ -123,7 +122,7 @@ public class PerceptronPrepAttachTest {
 
   @Test
   public void testModelEquals() throws IOException {
-    Map<String, String> trainParams = new HashMap<>();
+    TrainingParameters trainParams = new TrainingParameters();
     trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
     trainParams.put(AbstractTrainer.CUTOFF_PARAM, Integer.toString(1));
     trainParams.put("UseSkippedAveraging", Boolean.toString(true));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/51cd8091/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
index 23d0b3a..338dfec 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
@@ -82,11 +82,11 @@ final public class OpennlpUtil {
         throw new ResourceInitializationException(e);
       }
 
-      if (!TrainerFactory.isValid(params.getSettings())) {
+      if (!TrainerFactory.isValid(params)) {
         throw new ResourceInitializationException(new Exception("Training parameters file is invalid!"));
       }
 
-      TrainerFactory.TrainerType trainerType = TrainerFactory.getTrainerType(params.getSettings());
+      TrainerFactory.TrainerType trainerType = TrainerFactory.getTrainerType(params);
       if (!isSequenceTrainingAllowed && TrainerFactory.TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
         throw new ResourceInitializationException(new Exception("Sequence training is not supported!"));
       }

[44/50] [abbrv] opennlp git commit: OPENNLP-1011: Fix pos eval tests

Posted by jo...@apache.org.

OPENNLP-1011: Fix pos eval tests


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a1ced404
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a1ced404
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a1ced404

Branch: refs/heads/parser_regression
Commit: a1ced404c4225225e59b3a5957d4579752b9a195
Parents: 61edfe5
Author: J�rn Kottmann <jo...@apache.org>
Authored: Tue Mar 21 23:29:16 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:57 2017 +0200

----------------------------------------------------------------------
 .../opennlp/tools/postag/POSTaggerFactory.java  | 20 ++++++++++++++------
 .../opennlp/tools/eval/ConllXPosTaggerEval.java | 16 ++++++++--------
 .../tools/eval/OntoNotes4PosTaggerEval.java     |  2 +-
 3 files changed, 23 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/a1ced404/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
index 37143c9..c4164f4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
@@ -27,6 +27,7 @@ import java.io.OutputStream;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 
 import opennlp.tools.dictionary.Dictionary;
@@ -186,10 +187,9 @@ public class POSTaggerFactory extends BaseToolFactory {
   public Map<String, ArtifactSerializer> createArtifactSerializersMap() {
     Map<String, ArtifactSerializer> serializers = super.createArtifactSerializersMap();
 
+
     // NOTE: This is only needed for old models and this if can be removed if support is dropped
-    if (Version.currentVersion().getMinor() < 8) {
-      POSDictionarySerializer.register(serializers);
-    }
+    POSDictionarySerializer.register(serializers);
 
     return serializers;
   }
@@ -269,11 +269,19 @@ public class POSTaggerFactory extends BaseToolFactory {
   }
 
   public POSContextGenerator getPOSContextGenerator(int cacheSize) {
-    if (Version.currentVersion().getMinor() >= 8) {
-      return new ConfigurablePOSContextGenerator(cacheSize, createFeatureGenerators());
+
+    if (artifactProvider != null) {
+      Properties manifest = (Properties) artifactProvider.getArtifact("manifest.properties");
+
+      String version = manifest.getProperty("OpenNLP-Version");
+
+      if (Version.parse(version).getMinor() < 8) {
+        return new DefaultPOSContextGenerator(cacheSize, getDictionary());
+      }
     }
+    
+    return new ConfigurablePOSContextGenerator(cacheSize, createFeatureGenerators());
 
-    return new DefaultPOSContextGenerator(cacheSize, getDictionary());
   }
 
   public SequenceValidator<String> getSequenceValidator() {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a1ced404/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
index 6245961..600e599 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java
@@ -84,7 +84,7 @@ public class ConllXPosTaggerEval {
         "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params);
 
     eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
-        "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9512987012987013d);
+        "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9504442925495558d);
   }
 
   @Test
@@ -95,7 +95,7 @@ public class ConllXPosTaggerEval {
         "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params);
 
     eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
-        "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9456596035543404d);
+        "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9564251537935748d);
   }
 
   @Test
@@ -106,7 +106,7 @@ public class ConllXPosTaggerEval {
         "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", params);
 
     eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
-        "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9174574753804834d);
+        "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9213965980304387d);
   }
 
   @Test
@@ -117,7 +117,7 @@ public class ConllXPosTaggerEval {
         "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", params);
 
     eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
-        "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9025962399283796d);
+        "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9282005371530886d);
   }
 
   @Test
@@ -128,7 +128,7 @@ public class ConllXPosTaggerEval {
         "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", params);
 
     eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
-        "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9659110277825124d);
+        "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9671041418101244d);
   }
 
   @Test
@@ -139,7 +139,7 @@ public class ConllXPosTaggerEval {
         "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", params);
 
     eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
-        "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9676154763933867d);
+        "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9662519175046872d);
   }
 
   @Test
@@ -150,7 +150,7 @@ public class ConllXPosTaggerEval {
         "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", params);
 
     eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
-        "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9275106082036775d);
+        "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9248585572842999d);
   }
 
   @Test
@@ -161,6 +161,6 @@ public class ConllXPosTaggerEval {
         "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", params);
 
     eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(),
-        "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9245049504950495d);
+        "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9322842998585573d);
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/a1ced404/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
index 5ce1fba..31b42d1 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java
@@ -62,6 +62,6 @@ public class OntoNotes4PosTaggerEval {
 
   @Test
   public void evalEnglishMaxentTagger() throws IOException {
-    crossEval(ModelUtil.createDefaultTrainingParameters(), 0.9707977252663043d);
+    crossEval(ModelUtil.createDefaultTrainingParameters(), 0.9699561275750962d);
   }
 }

[42/50] [abbrv] opennlp git commit: OPENNLP-1005: Implement areOutcomesCompatible for BilouCodec

Posted by jo...@apache.org.

OPENNLP-1005: Implement areOutcomesCompatible for BilouCodec

This issue closes #144


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/36de0131
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/36de0131
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/36de0131

Branch: refs/heads/parser_regression
Commit: 36de0131947d98e8246ef0fcf8eaf56d546d27b4
Parents: bc99b72
Author: Peter Thygesen <pe...@gmail.com>
Authored: Thu Mar 16 11:19:58 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:56 2017 +0200

----------------------------------------------------------------------
 .../java/opennlp/tools/namefind/BilouCodec.java |  61 +++
 .../opennlp/tools/namefind/BilouCodecTest.java  | 375 +++++++++++++++++++
 2 files changed, 436 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/36de0131/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
index 7e8508a..50cc4bf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouCodec.java
@@ -19,7 +19,9 @@ package opennlp.tools.namefind;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import opennlp.tools.util.SequenceCodec;
 import opennlp.tools.util.SequenceValidator;
@@ -111,8 +113,67 @@ public class BilouCodec implements SequenceCodec<String> {
     return new BilouNameFinderSequenceValidator();
   }
 
+  /**
+   * B requires CL or L
+   * C requires BL
+   * L requires B
+   * O requires any valid combo/unit
+   * U requires none
+   *
+   * @param outcomes all possible model outcomes
+   *
+   * @return true, if model outcomes are compatible
+   */
   @Override
   public boolean areOutcomesCompatible(String[] outcomes) {
+    Set<String> start = new HashSet<>();
+    Set<String> cont = new HashSet<>();
+    Set<String> last = new HashSet<>();
+    Set<String> unit = new HashSet<>();
+
+    for (int i = 0; i < outcomes.length; i++) {
+      String outcome = outcomes[i];
+      if (outcome.endsWith(BilouCodec.START)) {
+        start.add(outcome.substring(0, outcome.length()
+            - BilouCodec.START.length()));
+      } else if (outcome.endsWith(BilouCodec.CONTINUE)) {
+        cont.add(outcome.substring(0, outcome.length()
+            - BilouCodec.CONTINUE.length()));
+      } else if (outcome.endsWith(BilouCodec.LAST)) {
+        last.add(outcome.substring(0, outcome.length()
+            - BilouCodec.LAST.length()));
+      } else if (outcome.endsWith(BilouCodec.UNIT)) {
+        unit.add(outcome.substring(0, outcome.length()
+            - BilouCodec.UNIT.length()));
+      } else if (!outcome.equals(BilouCodec.OTHER)) {
+        return false;
+      }
+    }
+
+    if (start.size() == 0 && unit.size() == 0) {
+      return false;
+    } else {
+      // Start, must have matching Last
+      for (String startPrefix : start) {
+        if (!last.contains(startPrefix)) {
+          return false;
+        }
+      }
+      // Cont, must have matching Start and Last
+      for (String contPrefix : cont) {
+        if (!start.contains(contPrefix) && !last.contains(contPrefix)) {
+          return false;
+        }
+      }
+      // Last, must have matching Start
+      for (String lastPrefix : last) {
+        if (!start.contains(lastPrefix)) {
+          return false;
+        }
+      }
+
+    }
+
     return true;
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/36de0131/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
index 96d939f..353c7e4 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
@@ -206,4 +206,379 @@ public class BilouCodecTest {
     Assert.assertArrayEquals(expected, actual);
   }
 
+
+  @Test
+  public void testCompatibilityEmpty() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {}));
+  }
+
+  /**
+   * Singles and singles in combination with other valid type (unit/start+last)
+   */
+
+  /**
+   * B-Start => Fail
+   * A-Unit, B-Start => Fail
+   * A-Start, A-Last, B-Start => Fail
+   */
+  @Test
+  public void testCompatibilitySinglesStart() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START}));
+  }
+
+  /**
+   * B-Continue => Fail
+   * A-Unit, B-Continue => Fail
+   * A-Start, A-Last, B-Continue => Fail
+   */
+  @Test
+  public void testCompatibilitySinglesContinue() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE}));
+  }
+
+  /**
+   * B-Last => Fail
+   * A-Unit, B-Last => Fail
+   * A-Start, A-Last, B-Last => Fail
+   */
+  @Test
+  public void testCompatibilitySinglesLast() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST}));
+  }
+
+  /**
+   * Other => Fail
+   * A-Unit, Other => Pass
+   * A-Start, A-Last, Other => Pass
+   */
+  @Test
+  public void testCompatibilitySinglesOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, OTHER}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, OTHER}));
+  }
+
+  /**
+   * B-Unit => Pass
+   * A-Unit, B-Unit => Pass
+   * A-Start, A-Last, B-Unit => Pass
+   */
+  @Test
+  public void testCompatibilitySinglesUnit() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_UNIT}));
+  }
+
+  /**
+   * Doubles and doubles in combination with other valid type (unit/start+last)
+   *
+   * B-Start, B-Continue => Fail
+   * A-Unit, B-Start, B-Continue => Fail
+   * A-Start, A-Last, B-Start, B-Continue => Fail
+   */
+  @Test
+  public void testCompatibilityStartContinue() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, B_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_CONTINUE}));
+  }
+
+  /**
+   * B-Start, B-Last => Pass
+   * A-Unit, B-Start, B-Last => Pass
+   * A-Start, A-Last, B-Start, B-Last => Pass
+   */
+  @Test
+  public void testCompatibilityStartLast() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {B_START, B_LAST}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_LAST}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_LAST}));
+  }
+
+  /**
+   * B-Start, Other => Fail
+   * A-Unit, B-Start, Other => Fail
+   * A-Start, A-Last, B-Start, Other => Fail
+   */
+  @Test
+  public void testCompatibilityStartOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, OTHER}));
+  }
+
+  /**
+   * B-Start, B-Unit => Fail
+   * A-Unit, B-Start, B-Unit => Fail
+   * A-Start, A-Last, B-Start, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityStartUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_START, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_START, B_UNIT}));
+  }
+
+  /**
+   * B-Continue, C-Last => Fail
+   * A-Unit, B-Continue, C-Last => Fail
+   * A-Start, A-Last, B-Continue, B-Last => Fail
+   */
+  @Test
+  public void testCompatibilityContinueLast() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, B_LAST}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, B_LAST}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, B_LAST}));
+  }
+
+  /**
+   * B-Continue, Other => Fail
+   * A-Unit, B-Continue, Other => Fail
+   * A-Start, A-Last, B-Continue, Other => Fail
+   */
+  @Test
+  public void testCompatibilityContinueOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, OTHER}));
+  }
+
+  /**
+   * B-Continue, B-Unit => Fail
+   * A-Unit, B-Continue, B-Unit => Fail
+   * A-Start, A-Last, B-Continue, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityContinueUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_CONTINUE, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_CONTINUE, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_CONTINUE, B_UNIT}));
+  }
+
+  /**
+   * B-Last, Other => Fail
+   * A-Unit, B-Last, Other => Fail
+   * A-Start, A-Last, B-Last, Other => Fail
+   */
+  @Test
+  public void testCompatibilityLastOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST, OTHER}));
+  }
+
+  /**
+   * B-Last, B-Unit => Fail
+   * A-Unit, B-Last, B-Unit => Fail
+   * A-Start, A-Last, B-Last, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityLastUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_LAST, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_UNIT, B_LAST, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, B_LAST, B_UNIT}));
+  }
+
+  /**
+   * Other, B-Unit => Pass
+   * A-Unit, Other, B-Unit => Pass
+   * A-Start, A-Last, Other, B-Unit => Pass
+   */
+  @Test
+  public void testCompatibilityOtherUnit() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {OTHER, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_UNIT, OTHER, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_LAST, OTHER, B_UNIT}));
+  }
+
+  /**
+   * Triples and triples in combination with other valid type (unit/start+last)
+   *
+   * B-Start, B-Continue, B-Last => Pass
+   * A-Unit, B-Start, B-Continue, B-Last => Pass
+   * A-Start, A-Last, B-Start, B-Continue, B-Last => Pass
+   */
+  @Test
+  public void testCompatibilityStartContinueLast() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_LAST}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST}));
+  }
+
+  /**
+   * B-Start, B-Continue, Other => Fail
+   * A-Unit, B-Start, B-Continue, Other => Fail
+   * A-Start, A-Last, B-Start, B-Continue, Other => Fail
+   */
+  @Test
+  public void testCompatibilityStartContinueOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, OTHER}));
+  }
+
+  /**
+   * B-Start, B-Continue, B-Unit => Fail
+   * A-Unit, B-Start, B-Continue, B-Unit => Fail
+   * A-Start, A-Last, B-Start, B-Continue, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityStartContinueUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_UNIT}));
+  }
+
+  /**
+   * B-Continue, B-Last, Other => Fail
+   * A-Unit, B-Continue, B-Last, Other => Fail
+   * A-Start, A-Last, B-Continue, B-Last, Other => Fail
+   */
+  @Test
+  public void testCompatibilityContinueLastOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_CONTINUE, B_LAST, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_CONTINUE, B_LAST, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, OTHER}));
+  }
+
+  /**
+   * B-Continue, B-Last, B-Unit => Fail
+   * A-Unit, B-Continue, B-Last, B_Unit => Fail
+   * A-Start, A-Last, B-Continue, B-Last, B_Unit => Fail
+   */
+  @Test
+  public void testCompatibilityContinueLastUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_CONTINUE, B_LAST, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_CONTINUE, B_LAST, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, B_UNIT}));
+  }
+
+  /**
+   * B-Last, Other, B-Unit => Fail
+   * A-Unit, B-Continue, B-Last, B_Unit => Fail
+   * A-Start, A-Last, B-Continue, B-Last, B_Unit => Fail
+   */
+  @Test
+  public void testCompatibilityLastOtherUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_LAST, OTHER, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_LAST, OTHER, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_LAST, OTHER, B_UNIT}));
+  }
+
+  /**
+   * Quadruples and quadruple in combination of unit/start+last
+   *
+   * B-Start, B-Continue, B-Last, Other => Pass
+   * A-Unit, B-Start, B-Continue, B-Last, Other => Pass
+   * A-Start, A-Last, B-Start, B-Continue, B-Last, Other => Pass
+   */
+  @Test
+  public void testCompatibilityStartContinueLastOther() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_LAST, OTHER}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, OTHER}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, OTHER}));
+  }
+
+  /**
+   * B-Start, B-Continue, B-Last, B-Unit => Pass
+   * A-Unit, B-Start, B-Continue, B-Last, B-Unit => Pass
+   * A-Start, A-Last, B-Start, B-Continue, B-Last, B-Unit => Pass
+   */
+  @Test
+  public void testCompatibilityStartContinueLastUnit() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_LAST, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, B_UNIT}));
+  }
+
+
+  /**
+   * B-Continue, B-Last, Other, B-Unit => Fail
+   * A-Unit, B-Continue, B-Last, Other, B-Unit => Fail
+   * A-Start, A-Last, B-Continue, B-Last, Other, B-Unit => Fail
+   */
+  @Test
+  public void testCompatibilityContinueLastOtherUnit() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+  }
+
+  /**
+   * Quintuple
+   *
+   * B-Start, B-Continue, B-Last, Other, B-Unit => Pass
+   * A-Unit, B-Start, B-Continue, B-Last, Other, B-Unit => Pass
+   * A-Staart, A-Last, B-Start, B-Continue, B-Last, Other, B-Unit => Pass
+   */
+  @Test
+  public void testCompatibilityUnitOther() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_UNIT, B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_LAST, B_START, B_CONTINUE, B_LAST, OTHER, B_UNIT}));
+  }
+
+  /**
+   * Multiclass
+   */
+  @Test
+  public void testCompatibilityMultiClass() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {B_UNIT, A_CONTINUE, A_LAST, A_UNIT,
+            B_START, B_LAST, A_START, C_UNIT, OTHER}));
+  }
+
+  /**
+   * Bad combinations
+   */
+  @Test
+  public void testCompatibilityBadTag() {
+    Assert.assertFalse(codec.areOutcomesCompatible(
+        new String[] {A_START, A_CONTINUE, OTHER, "BAD"}));
+  }
+
+  @Test
+  public void testCompatibilityWrongClass() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, B_LAST, OTHER}));
+  }
+
+
+
 }

[49/50] [abbrv] opennlp git commit: OPENNLP-1016: Add more tests for StringList

Posted by jo...@apache.org.

OPENNLP-1016: Add more tests for StringList


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5eb8ff8d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5eb8ff8d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5eb8ff8d

Branch: refs/heads/parser_regression
Commit: 5eb8ff8deb442ece89fad5f14368c6cbe10772d4
Parents: a1ced40
Author: koji <ko...@rondhuit.com>
Authored: Mon Apr 10 14:16:34 2017 +0900
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:57 2017 +0200

----------------------------------------------------------------------
 .../test/java/opennlp/tools/util/StringListTest.java | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/5eb8ff8d/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
index a57a2ae..d8c7ca2 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
@@ -28,6 +28,16 @@ import org.junit.Test;
 public class StringListTest {
 
   /**
+   * Tests {@link StringList} which uses {@link String#intern}.
+   */
+  @Test
+  public void testIntern() {
+    StringList l1 = new StringList("a");
+    StringList l2 = new StringList("a", "b");
+    Assert.assertTrue(l1.getToken(0) == l2.getToken(0));
+  }
+
+  /**
    * Tests {@link StringList#getToken(int)}.
    */
   @Test
@@ -90,6 +100,8 @@ public class StringListTest {
   public void testHashCode() {
     Assert.assertEquals(new StringList("a", "b").hashCode(),
         new StringList("a", "b").hashCode());
+    Assert.assertNotEquals(new StringList("a", "b").hashCode(),
+        new StringList("a", "c").hashCode());
   }
 
   /**
@@ -97,6 +109,7 @@ public class StringListTest {
    */
   @Test
   public void testToString() {
-    new StringList("a", "b").toString();
+    Assert.assertEquals("[a]", new StringList("a").toString());
+    Assert.assertEquals("[a,b]", new StringList("a", "b").toString());
   }
 }

[08/50] [abbrv] opennlp git commit: [maven-release-plugin] prepare release opennlp-1.7.2

Posted by jo...@apache.org.

[maven-release-plugin] prepare release opennlp-1.7.2


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4b8ebad4
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4b8ebad4
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4b8ebad4

Branch: refs/heads/parser_regression
Commit: 4b8ebad42877bca23a6cda78360d59d71f263139
Parents: 8b47930
Author: smarthi <sm...@apache.org>
Authored: Wed Feb 1 11:03:26 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:51 2017 +0200

----------------------------------------------------------------------
 opennlp-brat-annotator/pom.xml   | 2 +-
 opennlp-distr/pom.xml            | 2 +-
 opennlp-docs/pom.xml             | 2 +-
 opennlp-morfologik-addon/pom.xml | 2 +-
 opennlp-tools/pom.xml            | 2 +-
 opennlp-uima/pom.xml             | 2 +-
 pom.xml                          | 4 ++--
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/4b8ebad4/opennlp-brat-annotator/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
index 4bf95cf..53517ca 100644
--- a/opennlp-brat-annotator/pom.xml
+++ b/opennlp-brat-annotator/pom.xml
@@ -17,7 +17,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.2-SNAPSHOT</version>
+		<version>1.7.2</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4b8ebad4/opennlp-distr/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index 613bd80..ae86021 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -24,7 +24,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.2-SNAPSHOT</version>
+		<version>1.7.2</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4b8ebad4/opennlp-docs/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml
index 7b916c0..b765d91 100644
--- a/opennlp-docs/pom.xml
+++ b/opennlp-docs/pom.xml
@@ -24,7 +24,7 @@
   <parent>
 	<groupId>org.apache.opennlp</groupId>
 	<artifactId>opennlp</artifactId>
-	<version>1.7.2-SNAPSHOT</version>
+	<version>1.7.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4b8ebad4/opennlp-morfologik-addon/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml
index d62a70a..50844f2 100644
--- a/opennlp-morfologik-addon/pom.xml
+++ b/opennlp-morfologik-addon/pom.xml
@@ -24,7 +24,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.2-SNAPSHOT</version>
+		<version>1.7.2</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4b8ebad4/opennlp-tools/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index 9441ebb..6cdb688 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -25,7 +25,7 @@
   <parent>
     <groupId>org.apache.opennlp</groupId>
     <artifactId>opennlp</artifactId>
-    <version>1.7.2-SNAPSHOT</version>
+    <version>1.7.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
     

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4b8ebad4/opennlp-uima/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml
index 1e99c3d..39f1040 100644
--- a/opennlp-uima/pom.xml
+++ b/opennlp-uima/pom.xml
@@ -25,7 +25,7 @@
 	<parent>
 	    <groupId>org.apache.opennlp</groupId>
 	    <artifactId>opennlp</artifactId>
-	    <version>1.7.2-SNAPSHOT</version>
+	    <version>1.7.2</version>
 	    <relativePath>../pom.xml</relativePath>
     </parent>
     

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4b8ebad4/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 7081f25..bbb48c8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
 
 	<groupId>org.apache.opennlp</groupId>
 	<artifactId>opennlp</artifactId>
-	<version>1.7.2-SNAPSHOT</version>
+	<version>1.7.2</version>
 	<packaging>pom</packaging>
 
 	<name>Apache OpenNLP Reactor</name>
@@ -40,7 +40,7 @@
 		<connection>scm:git:git@github.com:apache/opennlp.git</connection>
 		<developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/opennlp.git</developerConnection>
 		<url>https://git-wip-us.apache.org/repos/asf?p=opennlp.git</url>
-		<tag>HEAD</tag>
+		<tag>opennlp-1.7.2</tag>
 	</scm>
 
 	<mailingLists>

[23/50] [abbrv] opennlp git commit: OPENNLP-994: Remove deprecated methods from the Document Categorizer, this closes apache/opennlp#133

Posted by jo...@apache.org.

OPENNLP-994: Remove deprecated methods from the Document Categorizer, this closes apache/opennlp#133


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/76609f5c
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/76609f5c
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/76609f5c

Branch: refs/heads/parser_regression
Commit: 76609f5c105bcfc3abab6e2d19de283d945c96a6
Parents: 81acc6e
Author: smarthi <sm...@apache.org>
Authored: Mon Feb 27 17:23:40 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:54 2017 +0200

----------------------------------------------------------------------
 .../doccat/DoccatCrossValidatorTool.java        |   7 +-
 .../tools/cmdline/doccat/DoccatTool.java        |  11 +-
 .../tools/cmdline/doccat/DoccatTrainerTool.java |   5 +-
 .../opennlp/tools/doccat/DoccatFactory.java     |  93 +----------------
 .../tools/doccat/DocumentCategorizer.java       |  54 ++--------
 .../doccat/DocumentCategorizerEvaluator.java    |   2 +-
 .../tools/doccat/DocumentCategorizerME.java     | 101 ++-----------------
 .../opennlp/tools/doccat/DocumentSample.java    |   6 --
 .../formats/LeipzigDoccatSampleStream.java      |  19 ++--
 .../tools/doccat/DocumentCategorizerMETest.java |  18 ++--
 .../tools/doccat/DocumentCategorizerNBTest.java |  17 ++--
 .../tools/doccat/DocumentSampleTest.java        |   4 +-
 .../doccat/AbstractDocumentCategorizer.java     |  29 +++---
 .../java/opennlp/uima/util/AnnotatorUtil.java   |   6 +-
 14 files changed, 66 insertions(+), 306 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
index f0f1712..a73aba7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
@@ -36,7 +36,6 @@ import opennlp.tools.doccat.DoccatEvaluationMonitor;
 import opennlp.tools.doccat.DoccatFactory;
 import opennlp.tools.doccat.DocumentSample;
 import opennlp.tools.doccat.FeatureGenerator;
-import opennlp.tools.tokenize.Tokenizer;
 import opennlp.tools.util.eval.EvaluationMonitor;
 import opennlp.tools.util.model.ModelUtil;
 
@@ -84,16 +83,12 @@ public final class DoccatCrossValidatorTool extends
     FeatureGenerator[] featureGenerators = DoccatTrainerTool
         .createFeatureGenerators(params.getFeatureGenerators());
 
-    Tokenizer tokenizer = DoccatTrainerTool.createTokenizer(params
-        .getTokenizer());
-
     DoccatEvaluationMonitor[] listenersArr = listeners
         .toArray(new DoccatEvaluationMonitor[listeners.size()]);
 
     DoccatCrossValidator validator;
     try {
-      DoccatFactory factory = DoccatFactory.create(params.getFactory(),
-          tokenizer, featureGenerators);
+      DoccatFactory factory = DoccatFactory.create(params.getFactory(), featureGenerators);
       validator = new DoccatCrossValidator(params.getLang(), mlParams,
           factory, listenersArr);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
index a01d354..49a640c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
@@ -28,6 +28,7 @@ import opennlp.tools.cmdline.SystemInputStreamFactory;
 import opennlp.tools.doccat.DoccatModel;
 import opennlp.tools.doccat.DocumentCategorizerME;
 import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ParagraphStream;
 import opennlp.tools.util.PlainTextByLineStream;
@@ -36,7 +37,7 @@ public class DoccatTool extends BasicCmdLineTool {
 
   @Override
   public String getShortDescription() {
-    return "learnable document categorizer";
+    return "learned document categorizer";
   }
 
   @Override
@@ -53,7 +54,7 @@ public class DoccatTool extends BasicCmdLineTool {
 
       DoccatModel model = new DoccatModelLoader().load(new File(args[0]));
 
-      DocumentCategorizerME doccat = new DocumentCategorizerME(model);
+      DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model);
 
       /*
        * moved initialization to the try block to catch new IOException
@@ -68,10 +69,10 @@ public class DoccatTool extends BasicCmdLineTool {
             new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()));
         String document;
         while ((document = documentStream.read()) != null) {
-          String[] tokens = model.getFactory().getTokenizer().tokenize(document);
+          String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(document);
 
-          double[] prob = doccat.categorize(tokens);
-          String category = doccat.getBestCategory(prob);
+          double[] prob = documentCategorizerME.categorize(tokens);
+          String category = documentCategorizerME.getBestCategory(prob);
 
           DocumentSample sample = new DocumentSample(category, tokens);
           System.out.println(sample.toString());

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
index 6ef5d88..8ebb5a8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
@@ -66,12 +66,9 @@ public class DoccatTrainerTool
     FeatureGenerator[] featureGenerators = createFeatureGenerators(params
         .getFeatureGenerators());
 
-    Tokenizer tokenizer = createTokenizer(params.getTokenizer());
-
     DoccatModel model;
     try {
-      DoccatFactory factory = DoccatFactory.create(params.getFactory(),
-          tokenizer, featureGenerators);
+      DoccatFactory factory = DoccatFactory.create(params.getFactory(), featureGenerators);
       model = DocumentCategorizerME.train(params.getLang(), sampleStream,
           mlParams, factory);
     } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
index a6c815b..babab7c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
@@ -22,8 +22,6 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
 import opennlp.tools.util.BaseToolFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.ext.ExtensionLoader;
@@ -34,47 +32,17 @@ import opennlp.tools.util.ext.ExtensionLoader;
 public class DoccatFactory extends BaseToolFactory {
 
   private static final String FEATURE_GENERATORS = "doccat.featureGenerators";
-  private static final String TOKENIZER_NAME = "doccat.tokenizer";
 
   private FeatureGenerator[] featureGenerators;
-  private Tokenizer tokenizer;
 
   /**
    * Creates a {@link DoccatFactory} that provides the default implementation of
    * the resources.
    */
-  public DoccatFactory() {
-    this.tokenizer = WhitespaceTokenizer.INSTANCE;
-  }
+  public DoccatFactory() {}
 
   public DoccatFactory(final FeatureGenerator[] featureGenerators) {
-    this.tokenizer = WhitespaceTokenizer.INSTANCE;
-    this.featureGenerators = featureGenerators;
-  }
-
-  /**
-   * Creates a {@link DoccatFactory}. Use this constructor to programmatically
-   * create a factory.
-   *
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   * @param tokenizer         the tokenizer
-   * @param featureGenerators the feature generators
-   */
-  @Deprecated
-  public DoccatFactory(Tokenizer tokenizer, FeatureGenerator[] featureGenerators) {
-    this.init(tokenizer, featureGenerators);
-  }
-
-  /**
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   * @param tokenizer the tokenizer
-   * @param featureGenerators feature generators
-   */
-  @Deprecated
-  protected void init(Tokenizer tokenizer, FeatureGenerator[] featureGenerators) {
-
     this.featureGenerators = featureGenerators;
-    this.tokenizer = tokenizer;
   }
 
   protected void init(FeatureGenerator[] featureGenerators) {
@@ -85,11 +53,6 @@ public class DoccatFactory extends BaseToolFactory {
   public Map<String, String> createManifestEntries() {
     Map<String, String> manifestEntries = super.createManifestEntries();
 
-    if (getTokenizer() != null) {
-      manifestEntries.put(TOKENIZER_NAME, getTokenizer().getClass()
-          .getCanonicalName());
-    }
-
     if (getFeatureGenerators() != null) {
       manifestEntries.put(FEATURE_GENERATORS, featureGeneratorsAsString());
     }
@@ -115,31 +78,6 @@ public class DoccatFactory extends BaseToolFactory {
     // nothing to validate
   }
 
-  /**
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  public static DoccatFactory create(String subclassName, Tokenizer tokenizer,
-      FeatureGenerator[] featureGenerators) throws InvalidFormatException {
-    if (subclassName == null) {
-      // will create the default factory
-      return new DoccatFactory(tokenizer, featureGenerators);
-    }
-    try {
-      DoccatFactory theFactory = ExtensionLoader.instantiateExtension(
-          DoccatFactory.class, subclassName);
-      theFactory.init(tokenizer, featureGenerators);
-      return theFactory;
-    } catch (Exception e) {
-      String msg = "Could not instantiate the " + subclassName
-          + ". The initialization throw an exception.";
-      System.err.println(msg);
-      e.printStackTrace();
-      throw new InvalidFormatException(msg, e);
-    }
-
-  }
-
   public static DoccatFactory create(String subclassName, FeatureGenerator[] featureGenerators)
       throws InvalidFormatException {
     if (subclassName == null) {
@@ -192,33 +130,4 @@ public class DoccatFactory extends BaseToolFactory {
     this.featureGenerators = featureGenerators;
   }
 
-  /**
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  public Tokenizer getTokenizer() {
-    if (this.tokenizer == null) {
-      if (artifactProvider != null) {
-        String className = artifactProvider.getManifestProperty(TOKENIZER_NAME);
-        if (className != null) {
-          this.tokenizer = ExtensionLoader.instantiateExtension(
-              Tokenizer.class, className);
-        }
-      }
-      if (this.tokenizer == null) { // could not load using artifact provider
-        this.tokenizer = WhitespaceTokenizer.INSTANCE;
-      }
-    }
-    return tokenizer;
-  }
-
-  /**
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   * @param tokenizer tokenizer
-   */
-  @Deprecated
-  public void setTokenizer(Tokenizer tokenizer) {
-    this.tokenizer = tokenizer;
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
index 88bf8f9..b180549 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
@@ -27,23 +27,21 @@ import java.util.SortedMap;
 public interface DocumentCategorizer {
 
   /**
-   * Categorizes the given text, provided in separate tokens.
+   * Categorize the given text provided as tokens along with
+   * the provided extra information
    *
    * @param text the tokens of text to categorize
+   * @param extraInformation extra information
    * @return per category probabilities
    */
-  double[] categorize(String[] text);
+  double[] categorize(String[] text, Map<String, Object> extraInformation);
 
   /**
    * Categorizes the given text, provided in separate tokens.
-   *
-   * @param text             the tokens of text to categorize
-   * @param extraInformation optional extra information to pass for evaluation
+   * @param text the tokens of text to categorize
    * @return per category probabilities
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
    */
-  @Deprecated
-  double[] categorize(String[] text, Map<String, Object> extraInformation);
+  double[] categorize(String[] text);
 
   /**
    * get the best category from previously generated outcome probabilities
@@ -77,25 +75,6 @@ public interface DocumentCategorizer {
   int getNumberOfCategories();
 
   /**
-   * categorize a piece of text
-   *
-   * @param documentText the text to categorize
-   * @return the probabilities of each category (sum up to 1)
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  double[] categorize(String documentText);
-
-  /**
-   * categorize a piece of text, providing extra metadata.
-   *
-   * @param documentText     the text to categorize
-   * @param extraInformation extra metadata
-   * @return the probabilities of each category (sum up to 1)
-   */
-  double[] categorize(String documentText, Map<String, Object> extraInformation);
-
-  /**
    * get the name of the category associated with the given probabilties
    *
    * @param results the probabilities of each category
@@ -108,16 +87,6 @@ public interface DocumentCategorizer {
    *
    * @param text the input text to classify
    * @return a map with the score as a key. The value is a Set of categories with the score.
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  Map<String, Double> scoreMap(String text);
-
-  /**
-   * Returns a map in which the key is the category name and the value is the score
-   *
-   * @param text the input text to classify
-   * @return a map with the score as a key. The value is a Set of categories with the score.
    */
   Map<String, Double> scoreMap(String[] text);
 
@@ -127,17 +96,6 @@ public interface DocumentCategorizer {
    *
    * @param text the input text to classify
    * @return a map with the score as a key. The value is a Set of categories with the score.
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  SortedMap<Double, Set<String>> sortedScoreMap(String text);
-
-  /**
-   * Get a map of the scores sorted in ascending aorder together with their associated categories.
-   * Many categories can have the same score, hence the Set as value
-   *
-   * @param text the input text to classify
-   * @return a map with the score as a key. The value is a Set of categories with the score.
    */
   SortedMap<Double, Set<String>> sortedScoreMap(String[] text);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
index 63e0768..c501280 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
@@ -59,7 +59,7 @@ public class DocumentCategorizerEvaluator extends Evaluator<DocumentSample> {
 
     String[] document = sample.getText();
 
-    double[] probs = categorizer.categorize(document, sample.getExtraInformation());
+    double[] probs = categorizer.categorize(document);
 
     String cat = categorizer.getBestCategory(probs);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
index e743b9d..9dc41d7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
@@ -29,8 +29,6 @@ import java.util.TreeMap;
 import opennlp.tools.ml.EventTrainer;
 import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.tokenize.SimpleTokenizer;
-import opennlp.tools.tokenize.Tokenizer;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.TrainingParameters;
 
@@ -48,22 +46,6 @@ public class DocumentCategorizerME implements DocumentCategorizer {
   private DocumentCategorizerContextGenerator mContextGenerator;
 
   /**
-   * Initializes the current instance with a doccat model and custom feature
-   * generation. The feature generation must be identical to the configuration
-   * at training time.
-   *
-   * @param model             the doccat model
-   * @param featureGenerators the feature generators
-   * @deprecated train a {@link DoccatModel} with a specific
-   * {@link DoccatFactory} to customize the {@link FeatureGenerator}s
-   */
-  @Deprecated
-  public DocumentCategorizerME(DoccatModel model, FeatureGenerator... featureGenerators) {
-    this.model = model;
-    this.mContextGenerator = new DocumentCategorizerContextGenerator(featureGenerators);
-  }
-
-  /**
    * Initializes the current instance with a doccat model. Default feature
    * generation is used.
    *
@@ -75,6 +57,13 @@ public class DocumentCategorizerME implements DocumentCategorizer {
         .getFactory().getFeatureGenerators());
   }
 
+  /**
+   * Categorize the given text provided as tokens along with
+   * the provided extra information
+   *
+   * @param text text tokens to categorize
+   * @param extraInformation additional information
+   */
   @Override
   public double[] categorize(String[] text, Map<String, Object> extraInformation) {
     return model.getMaxentModel().eval(
@@ -83,58 +72,15 @@ public class DocumentCategorizerME implements DocumentCategorizer {
 
   /**
    * Categorizes the given text.
+   *
    * @param text the text to categorize
    */
+  @Override
   public double[] categorize(String[] text) {
     return this.categorize(text, Collections.emptyMap());
   }
 
   /**
-   * Categorizes the given text. The Tokenizer is obtained from
-   * {@link DoccatFactory#getTokenizer()} and defaults to
-   * {@link SimpleTokenizer}.
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  @Override
-  public double[] categorize(String documentText,
-      Map<String, Object> extraInformation) {
-    Tokenizer tokenizer = model.getFactory().getTokenizer();
-    return categorize(tokenizer.tokenize(documentText), extraInformation);
-  }
-
-  /**
-   * Categorizes the given text. The text is tokenized with the SimpleTokenizer
-   * before it is passed to the feature generation.
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  public double[] categorize(String documentText) {
-    Tokenizer tokenizer = model.getFactory().getTokenizer();
-    return categorize(tokenizer.tokenize(documentText), Collections.emptyMap());
-  }
-
-  /**
-   * Returns a map in which the key is the category name and the value is the score
-   *
-   * @param text the input text to classify
-   * @return the score map
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  public Map<String, Double> scoreMap(String text) {
-    Map<String, Double> probDist = new HashMap<>();
-
-    double[] categorize = categorize(text);
-    int catSize = getNumberOfCategories();
-    for (int i = 0; i < catSize; i++) {
-      String category = getCategory(i);
-      probDist.put(category, categorize[getIndex(category)]);
-    }
-    return probDist;
-  }
-
-  /**
    * Returns a map in which the key is the category name and the value is the score
    *
    * @param text the input text to classify
@@ -160,35 +106,6 @@ public class DocumentCategorizerME implements DocumentCategorizer {
    *
    * @param text the input text to classify
    * @return the sorted score map
-   * @deprecated will be removed after 1.7.1 release. Don't use it.
-   */
-  @Deprecated
-  @Override
-  public SortedMap<Double, Set<String>> sortedScoreMap(String text) {
-    SortedMap<Double, Set<String>> descendingMap = new TreeMap<>();
-    double[] categorize = categorize(text);
-    int catSize = getNumberOfCategories();
-    for (int i = 0; i < catSize; i++) {
-      String category = getCategory(i);
-      double score = categorize[getIndex(category)];
-      if (descendingMap.containsKey(score)) {
-        descendingMap.get(score).add(category);
-      } else {
-        Set<String> newset = new HashSet<>();
-        newset.add(category);
-        descendingMap.put(score, newset);
-      }
-    }
-    return descendingMap;
-  }
-
-  /**
-   * Returns a map with the score as a key in ascending order.
-   * The value is a Set of categories with the score.
-   * Many categories can have the same score, hence the Set as value
-   *
-   * @param text the input text to classify
-   * @return the sorted score map
    */
   @Override
   public SortedMap<Double, Set<String>> sortedScoreMap(String[] text) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
index 3d107fa..adddc27 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
@@ -24,8 +24,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-
 /**
  * Class which holds a classified document and its category.
  */
@@ -35,10 +33,6 @@ public class DocumentSample {
   private final List<String> text;
   private final Map<String, Object> extraInformation;
 
-  public DocumentSample(String category, String text) {
-    this(category, WhitespaceTokenizer.INSTANCE.tokenize(text));
-  }
-
   public DocumentSample(String category, String[] text) {
     this(category, text, null);
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
index 1ca0484..8ed0036 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
@@ -20,6 +20,9 @@ package opennlp.tools.formats;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 
 import opennlp.tools.doccat.DocumentSample;
 import opennlp.tools.tokenize.SimpleTokenizer;
@@ -36,7 +39,7 @@ import opennlp.tools.util.PlainTextByLineStream;
  * <p>
  * The input text is tokenized with the {@link SimpleTokenizer}. The input text classified
  * by the language model must also be tokenized by the {@link SimpleTokenizer} to produce
- * exactly the same tokenization during testing and training.
+ * exactly the same tokenization during testing and training.�
  */
 public class LeipzigDoccatSampleStream extends
     FilterObjectStream<String, DocumentSample> {
@@ -79,10 +82,8 @@ public class LeipzigDoccatSampleStream extends
   }
 
   public DocumentSample read() throws IOException {
-
     int count = 0;
-
-    StringBuilder sampleText = new StringBuilder();
+    List<String> tokensList = new ArrayList<>();
 
     String line;
     while (count < sentencesPerDocument && (line = samples.read()) != null) {
@@ -94,17 +95,13 @@ public class LeipzigDoccatSampleStream extends
       }
 
       // Always skip first token, that is the sentence number!
-      for (int i = 1; i < tokens.length; i++) {
-        sampleText.append(tokens[i]);
-        sampleText.append(' ');
-      }
+      tokensList.addAll(Arrays.asList(tokens).subList(1, tokens.length));
 
       count++;
     }
 
-
-    if (sampleText.length() > 0) {
-      return new DocumentSample(language, sampleText.toString());
+    if (tokensList.size() > 0) {
+      return new DocumentSample(language, tokensList.toArray(new String[tokensList.size()]));
     }
 
     return null;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
index 6389530..220df87 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
@@ -42,27 +42,23 @@ public class DocumentCategorizerMETest {
         new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}));
 
     TrainingParameters params = new TrainingParameters();
-    params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
-    params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
+    params.put(TrainingParameters.ITERATIONS_PARAM, "100");
+    params.put(TrainingParameters.CUTOFF_PARAM, "0");
 
     DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
             params, new DoccatFactory());
 
     DocumentCategorizer doccat = new DocumentCategorizerME(model);
 
-    double[] aProbs = doccat.categorize("a");
+    double[] aProbs = doccat.categorize(new String[]{"a"});
     Assert.assertEquals("1", doccat.getBestCategory(aProbs));
 
-    double[] bProbs = doccat.categorize("x");
+    double[] bProbs = doccat.categorize(new String[]{"x"});
     Assert.assertEquals("0", doccat.getBestCategory(bProbs));
 
     //test to make sure sorted map's last key is cat 1 because it has the highest score.
-    SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap("a");
-    for (String cat : sortedScoreMap.get(sortedScoreMap.lastKey())) {
-      Assert.assertEquals("1", cat);
-      break;
-    }
-    System.out.println("");
-
+    SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"});
+    Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey());
+    Assert.assertEquals(1, cat.size());
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
index de3f098..0847690 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
@@ -44,8 +44,8 @@ public class DocumentCategorizerNBTest {
         new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}));
 
     TrainingParameters params = new TrainingParameters();
-    params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
-    params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
+    params.put(TrainingParameters.ITERATIONS_PARAM, "100");
+    params.put(TrainingParameters.CUTOFF_PARAM, "0");
     params.put(AbstractTrainer.ALGORITHM_PARAM, NaiveBayesTrainer.NAIVE_BAYES_VALUE);
 
     DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
@@ -53,19 +53,16 @@ public class DocumentCategorizerNBTest {
 
     DocumentCategorizer doccat = new DocumentCategorizerME(model);
 
-    double[] aProbs = doccat.categorize("a");
+    double[] aProbs = doccat.categorize(new String[]{"a"});
     Assert.assertEquals("1", doccat.getBestCategory(aProbs));
 
-    double[] bProbs = doccat.categorize("x");
+    double[] bProbs = doccat.categorize(new String[]{"x"});
     Assert.assertEquals("0", doccat.getBestCategory(bProbs));
 
     //test to make sure sorted map's last key is cat 1 because it has the highest score.
-    SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap("a");
-    for (String cat : sortedScoreMap.get(sortedScoreMap.lastKey())) {
-      Assert.assertEquals("1", cat);
-      break;
-    }
-    System.out.println("");
+    SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"});
+    Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey());
+    Assert.assertEquals(1, cat.size());
 
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java
index 232158b..8cf8fef 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentSampleTest.java
@@ -31,11 +31,11 @@ public class DocumentSampleTest {
   }
 
   public static DocumentSample createGoldSample() {
-    return new DocumentSample("aCategory", "a small text");
+    return new DocumentSample("aCategory", new String[]{"a", "small", "text"});
   }
 
   public static DocumentSample createPredSample() {
-    return new DocumentSample("anotherCategory", "a small text");
+    return new DocumentSample("anotherCategory", new String[]{"a", "small", "text"});
   }
 
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
index db9c075..4b49dca 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
@@ -17,12 +17,17 @@
 
 package opennlp.uima.doccat;
 
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.resource.ResourceAccessException;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.util.Level;
@@ -72,29 +77,25 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase {
     mCategorizer = new DocumentCategorizerME(model);
   }
 
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws AnalysisEngineProcessException {
+  public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException {
     mTokenType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
-        UimaUtil.SENTENCE_TYPE_PARAMETER);
+        UimaUtil.TOKEN_TYPE_PARAMETER);
   }
 
   protected abstract void setBestCategory(CAS cas, String bestCategory);
 
   public void process(CAS cas) {
 
-    double[] result;
-
-    if (mTokenType != null) {
-      // TODO:
-      // count tokens
-      // create token array
-      // pass array to doccat
-      // create result annotation
-      result = mCategorizer.categorize(cas.getDocumentText());
-    } else {
-      result = mCategorizer.categorize(cas.getDocumentText());
+    FSIterator<AnnotationFS> tokenAnnotations = cas.getAnnotationIndex(mTokenType).iterator();
+    List<String> tokensList = new ArrayList<>();
+
+    while (tokenAnnotations.hasNext()) {
+      tokensList.add(tokenAnnotations.next().getCoveredText());
     }
 
+    double[] result =
+        mCategorizer.categorize(tokensList.toArray(new String[tokensList.size()]));
+
     String bestCategory = mCategorizer.getBestCategory(result);
 
     setBestCategory(cas, bestCategory);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/76609f5c/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
index 8847107..730d6be 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
@@ -329,8 +329,7 @@ public final class AnnotatorUtil {
     } else {
       throw new ResourceInitializationException(
           ExceptionMessages.MESSAGE_CATALOG,
-          ExceptionMessages.WRONG_PARAMETER_TYPE, new Object[] {parameter,
-          "String array"});
+          ExceptionMessages.WRONG_PARAMETER_TYPE, new Object[] {parameter, "String array"});
     }
   }
 
@@ -443,8 +442,7 @@ public final class AnnotatorUtil {
     if (inResource == null) {
       throw new ResourceInitializationException(
           ExceptionMessages.MESSAGE_CATALOG,
-          ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] {name
-          + " could not be found!"});
+          ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] {name + " could not be found!"});
     }
 
     return inResource;

[16/50] [abbrv] opennlp git commit: OPENNLP-990 Fix all array style violations and add a checkstyle rule

Posted by jo...@apache.org.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
index 1e90ecc..259d9f4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
@@ -37,8 +37,8 @@ public class PosSampleStream extends FilterObjectStream<Parse, POSSample> {
 
       Parse[] nodes = parse.getTagNodes();
 
-      String toks[] = new String[nodes.length];
-      String preds[] = new String[nodes.length];
+      String[] toks = new String[nodes.length];
+      String[] preds = new String[nodes.length];
 
       for (int ti = 0; ti < nodes.length; ti++) {
         Parse tok = nodes[ti];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
index f103450..5f5eb25 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
@@ -99,7 +99,7 @@ public class POSDictionary implements Iterable<String>, MutableTagDictionary {
     return dictionary.keySet().iterator();
   }
 
-  private static String tagsToString(String tags[]) {
+  private static String tagsToString(String[] tags) {
 
     StringBuilder tagString = new StringBuilder();
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
index 26cb79c..eaf6baf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSEvaluator.java
@@ -57,8 +57,8 @@ public class POSEvaluator extends Evaluator<POSSample> {
   @Override
   protected POSSample processSample(POSSample reference) {
 
-    String predictedTags[] = tagger.tag(reference.getSentence(), reference.getAddictionalContext());
-    String referenceTags[] = reference.getTags();
+    String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext());
+    String[] referenceTags = reference.getTags();
 
     for (int i = 0; i < referenceTags.length; i++) {
       if (referenceTags[i].equals(predictedTags[i])) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
index b1b2d32..9512e38 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java
@@ -37,7 +37,7 @@ public class POSSample {
 
   private final String[][] additionalContext;
 
-  public POSSample(String sentence[], String tags[]) {
+  public POSSample(String[] sentence, String[] tags) {
     this(sentence, tags, null);
   }
 
@@ -66,7 +66,7 @@ public class POSSample {
     this.additionalContext = ac;
   }
 
-  public POSSample(String sentence[], String tags[],
+  public POSSample(String[] sentence, String[] tags,
       String[][] additionalContext) {
     this(Arrays.asList(sentence), Arrays.asList(tags), additionalContext);
   }
@@ -120,10 +120,10 @@ public class POSSample {
 
   public static POSSample parse(String sentenceString) throws InvalidFormatException {
 
-    String tokenTags[] = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
+    String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
 
-    String sentence[] = new String[tokenTags.length];
-    String tags[] = new String[tokenTags.length];
+    String[] sentence = new String[tokenTags.length];
+    String[] tags = new String[tokenTags.length];
 
     for (int i = 0; i < tokenTags.length; i++) {
       int split = tokenTags[i].lastIndexOf("_");

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
index 4dd31e0..aa3c99d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java
@@ -63,9 +63,9 @@ public class POSSampleEventStream extends AbstractEventStream<POSSample> {
 
   @Override
   protected Iterator<Event> createEvents(POSSample sample) {
-    String sentence[] = sample.getSentence();
-    String tags[] = sample.getTags();
-    Object ac[] = sample.getAddictionalContext();
+    String[] sentence = sample.getSentence();
+    String[] tags = sample.getTags();
+    Object[] ac = sample.getAddictionalContext();
     List<Event> events = generateEvents(sentence, tags, ac, cg);
     return events.iterator();
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java
index b81fc48..9942d67 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java
@@ -59,8 +59,8 @@ public class POSSampleSequenceStream implements SequenceStream {
     POSSample sample = psi.read();
 
     if (sample != null) {
-      String sentence[] = sample.getSentence();
-      String tags[] = sample.getTags();
+      String[] sentence = sample.getSentence();
+      String[] tags = sample.getTags();
       Event[] events = new Event[sentence.length];
 
       for (int i = 0; i < sentence.length; i++) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
index 91bda34..75d0ec0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
@@ -28,14 +28,14 @@ import java.util.List;
  */
 public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner {
 
-  private char eosCharacters[];
+  private char[] eosCharacters;
 
   /**
    * Initializes the current instance.
    *
    * @param eosCharacters
    */
-  public DefaultEndOfSentenceScanner(char eosCharacters[]) {
+  public DefaultEndOfSentenceScanner(char[] eosCharacters) {
     this.eosCharacters = eosCharacters;
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java
index b246327..3effda8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java
@@ -51,8 +51,8 @@ public class SentenceDetectorEvaluator extends Evaluator<SentenceSample> {
     this.sentenceDetector = sentenceDetector;
   }
 
-  private Span[] trimSpans(String document, Span spans[]) {
-    Span trimedSpans[] = new Span[spans.length];
+  private Span[] trimSpans(String document, Span[] spans) {
+    Span[] trimedSpans = new Span[spans.length];
 
     for (int i = 0; i < spans.length; i++) {
       trimedSpans[i] = spans[i].trim(document);
@@ -63,7 +63,7 @@ public class SentenceDetectorEvaluator extends Evaluator<SentenceSample> {
 
   @Override
   protected SentenceSample processSample(SentenceSample sample) {
-    Span predictions[] =
+    Span[] predictions =
         trimSpans(sample.getDocument(), sentenceDetector.sentPosDetect(sample.getDocument()));
     Span[] references = trimSpans(sample.getDocument(), sample.getSentences());
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
index 2f3fd6c..b5ad804 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
@@ -128,7 +128,7 @@ public class SentenceDetectorME implements SentenceDetector {
    */
   public String[] sentDetect(String s) {
     Span[] spans = sentPosDetect(s);
-    String sentences[];
+    String[] sentences;
     if (spans.length != 0) {
       sentences = new String[spans.length];
       for (int si = 0; si < spans.length; si++) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
index 1f1b79a..dbbd193 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
@@ -53,7 +53,7 @@ public class SentenceSample {
 
     StringBuilder documentBuilder = new StringBuilder();
 
-    for (String sentenceTokens[] : sentences) {
+    for (String[] sentenceTokens : sentences) {
 
       String sampleSentence = detokenizer.detokenize(sentenceTokens, null);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java
index 9ffe649..55eca1c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java
@@ -87,8 +87,8 @@ public class DetokenizationDictionary {
    * @param operations an array of operations which specifies which operation
    *        should be used for the provided tokens
    */
-  public DetokenizationDictionary(String tokens[],
-      DetokenizationDictionary.Operation operations[]) {
+  public DetokenizationDictionary(String[] tokens,
+      DetokenizationDictionary.Operation[] operations) {
     if (tokens.length != operations.length)
       throw new IllegalArgumentException("tokens and ops must have the same length: tokens=" +
           tokens.length + ", operations=" + operations.length + "!");

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java
index 3af8597..acb9f45 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Detokenizer.java
@@ -57,7 +57,7 @@ public interface Detokenizer {
    * @param tokens the tokens to detokenize.
    * @return the merge operations to detokenize the input tokens.
    */
-  DetokenizationOperation[] detokenize(String tokens[]);
+  DetokenizationOperation[] detokenize(String[] tokens);
 
   /**
    * Detokenize the input tokens into a String. Tokens which
@@ -69,5 +69,5 @@ public interface Detokenizer {
    *
    * @return the concatenated tokens
    */
-  String detokenize(String tokens[], String splitMarker);
+  String detokenize(String[] tokens, String splitMarker);
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java
index 33eab9e..d53eefa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java
@@ -37,7 +37,7 @@ public class DictionaryDetokenizer implements Detokenizer {
 
   public DetokenizationOperation[] detokenize(String[] tokens) {
 
-    DetokenizationOperation operations[] = new DetokenizationOperation[tokens.length];
+    DetokenizationOperation[] operations = new DetokenizationOperation[tokens.length];
 
     Set<String> matchingTokens = new HashSet<>();
 
@@ -79,9 +79,9 @@ public class DictionaryDetokenizer implements Detokenizer {
     return operations;
   }
 
-  public String detokenize(String tokens[], String splitMarker) {
+  public String detokenize(String[] tokens, String splitMarker) {
 
-    DetokenizationOperation operations[] = detokenize(tokens);
+    DetokenizationOperation[] operations = detokenize(tokens);
 
     if (tokens.length != operations.length)
       throw new IllegalArgumentException("tokens and operations array must have same length: tokens=" +

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java
index dd9745e..39b8a80 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokSpanEventStream.java
@@ -99,7 +99,7 @@ public class TokSpanEventStream extends AbstractEventStream<TokenSample> {
 
     List<Event> events = new ArrayList<>(50);
 
-    Span tokens[] = tokenSample.getTokenSpans();
+    Span[] tokens = tokenSample.getTokenSpans();
     String text = tokenSample.getText();
 
     if (tokens.length > 0) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
index f895e7d..3ec3b8d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java
@@ -45,7 +45,7 @@ public class TokenSample {
    * @param text the text which contains the tokens.
    * @param tokenSpans the spans which mark the begin and end of the tokens.
    */
-  public TokenSample(String text, Span tokenSpans[]) {
+  public TokenSample(String text, Span[] tokenSpans) {
     Objects.requireNonNull(tokenSpans, "tokenSpans must not be null");
 
     this.text = Objects.requireNonNull(text, "text must not be null");
@@ -60,7 +60,7 @@ public class TokenSample {
     }
   }
 
-  public TokenSample(Detokenizer detokenizer, String tokens[]) {
+  public TokenSample(Detokenizer detokenizer, String[] tokens) {
 
     StringBuilder sentence = new StringBuilder();
 
@@ -159,7 +159,7 @@ public class TokenSample {
     Objects.requireNonNull(sampleString, "sampleString must not be null");
     Objects.requireNonNull(separatorChars, "separatorChars must not be null");
 
-    Span whitespaceTokenSpans[] = WhitespaceTokenizer.INSTANCE.tokenizePos(sampleString);
+    Span[] whitespaceTokenSpans = WhitespaceTokenizer.INSTANCE.tokenizePos(sampleString);
 
     // Pre-allocate 20% for newly created tokens
     List<Span> realTokenSpans = new ArrayList<>((int) (whitespaceTokenSpans.length * 1.2d));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
index b70898a..fa4d35b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluator.java
@@ -55,7 +55,7 @@ public class TokenizerEvaluator extends Evaluator<TokenSample> {
 
   @Override
   protected TokenSample processSample(TokenSample reference) {
-    Span predictions[] = tokenizer.tokenizePos(reference.getText());
+    Span[] predictions = tokenizer.tokenizePos(reference.getText());
     fmeasure.updateScores(reference.getTokenSpans(), predictions);
 
     return new TokenSample(reference.getText(), predictions);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
index 2feb26d..bfb87c5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
@@ -41,7 +41,7 @@ public class TokenizerStream implements ObjectStream<TokenSample> {
     String inputString = input.read();
 
     if (inputString != null) {
-      Span tokens[] = tokenizer.tokenizePos(inputString);
+      Span[] tokens = tokenizer.tokenizePos(inputString);
 
       return new TokenSample(inputString, tokens);
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java b/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
index 934cbac..c6c474d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
@@ -38,7 +38,7 @@ public interface SequenceCodec<T> {
    *
    * @return
    */
-  T[] encode(Span names[], int length);
+  T[] encode(Span[] names, int length);
 
   /**
    * Creates a sequence validator which can validate a sequence of outcomes.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
index 5736cf4..e589c93 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
@@ -27,7 +27,7 @@ import java.util.Objects;
  */
 public class StringList implements Iterable<String> {
 
-  private String tokens[];
+  private String[] tokens;
 
   /**
    * Initializes the current instance.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
index 6682ec0..3ed769b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
@@ -73,7 +73,7 @@ public class StringUtil {
    * @return lower cased String
    */
   public static String toLowerCase(CharSequence string) {
-    char lowerCaseChars[] = new char[string.length()];
+    char[] lowerCaseChars = new char[string.length()];
 
     for (int i = 0; i < string.length(); i++) {
       lowerCaseChars[i] = Character.toLowerCase(string.charAt(i));
@@ -91,7 +91,7 @@ public class StringUtil {
    * @return upper cased String
    */
   public static String toUpperCase(CharSequence string) {
-    char upperCaseChars[] = new char[string.length()];
+    char[] upperCaseChars = new char[string.length()];
 
     for (int i = 0; i < string.length(); i++) {
       upperCaseChars[i] = Character.toUpperCase(string.charAt(i));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
index efcfce4..e7cf25c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
@@ -21,7 +21,7 @@ import java.util.List;
 
 public class DocumentBeginFeatureGenerator implements AdaptiveFeatureGenerator {
 
-  private String firstSentence[];
+  private String[] firstSentence;
 
   public void createFeatures(List<String> features, String[] tokens, int index,
       String[] previousOutcomes) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
index 8cee38e..da67204 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java
@@ -33,9 +33,9 @@ public class InSpanGenerator implements AdaptiveFeatureGenerator {
 
   private final TokenNameFinder finder;
 
-  private String currentSentence[];
+  private String[] currentSentence;
 
-  private Span currentNames[];
+  private Span[] currentNames;
 
   /**
    * Initializes the current instance.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
index 11b5f97..93b2122 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
@@ -56,7 +56,7 @@ public class WordClusterDictionary implements SerializableArtifact {
     BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
     String line;
     while ((line = reader.readLine()) != null) {
-      String parts[] = line.split(" ");
+      String[] parts = line.split(" ");
       if (parts.length == 3) {
         tokenToClusterMap.put(parts[0], parts[1].intern());
       } else if (parts.length == 2) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java
index 85f6e12..bcba6ea 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java
@@ -116,7 +116,7 @@ public final class ModelUtil {
     ByteArrayOutputStream byteArrayOut = new ByteArrayOutputStream();
 
     int length;
-    byte buffer[] = new byte[1024];
+    byte[] buffer = new byte[1024];
     while ((length = in.read(buffer)) > 0) {
       byteArrayOut.write(buffer, 0, length);
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
index c939442..51112df 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
@@ -85,7 +85,7 @@ public class ChunkerMETest {
   @Test
   public void testChunkAsArray() throws Exception {
 
-    String preds[] = chunker.chunk(toks1, tags1);
+    String[] preds = chunker.chunk(toks1, tags1);
 
     Assert.assertArrayEquals(expect1, preds);
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java
index c4b0516..b214b26 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java
@@ -120,7 +120,7 @@ public class ArgumentParserTest {
   @Test
   public void testSimpleArgumentsUsage() {
 
-    String arguments[] = new String[] {"-encoding charset",
+    String[] arguments = new String[] {"-encoding charset",
         "[-iterations num]",
         "[-alphaNumOpt true|false]"};
 
@@ -144,7 +144,7 @@ public class ArgumentParserTest {
   @Test
   public void testDefaultEncodingParameter() {
 
-    String args[] = "-something aValue".split(" ");
+    String[] args = "-something aValue".split(" ");
     Assert.assertTrue(ArgumentParser.validateArguments(args, ExtendsEncodingParameter.class));
 
     ExtendsEncodingParameter params = ArgumentParser.parse(args, ExtendsEncodingParameter.class);
@@ -162,7 +162,7 @@ public class ArgumentParserTest {
       }
     }
 
-    String args[] = ("-something aValue -encoding " + notTheDefaultCharset).split(" ");
+    String[] args = ("-something aValue -encoding " + notTheDefaultCharset).split(" ");
     Assert.assertTrue(ArgumentParser.validateArguments(args, ExtendsEncodingParameter.class));
 
     ExtendsEncodingParameter params = ArgumentParser.parse(args, ExtendsEncodingParameter.class);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
index d569e74..6389530 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java
@@ -50,10 +50,10 @@ public class DocumentCategorizerMETest {
 
     DocumentCategorizer doccat = new DocumentCategorizerME(model);
 
-    double aProbs[] = doccat.categorize("a");
+    double[] aProbs = doccat.categorize("a");
     Assert.assertEquals("1", doccat.getBestCategory(aProbs));
 
-    double bProbs[] = doccat.categorize("x");
+    double[] bProbs = doccat.categorize("x");
     Assert.assertEquals("0", doccat.getBestCategory(bProbs));
 
     //test to make sure sorted map's last key is cat 1 because it has the highest score.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
index ae97840..de3f098 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
@@ -53,10 +53,10 @@ public class DocumentCategorizerNBTest {
 
     DocumentCategorizer doccat = new DocumentCategorizerME(model);
 
-    double aProbs[] = doccat.categorize("a");
+    double[] aProbs = doccat.categorize("a");
     Assert.assertEquals("1", doccat.getBestCategory(aProbs));
 
-    double bProbs[] = doccat.categorize("x");
+    double[] bProbs = doccat.categorize("x");
     Assert.assertEquals("0", doccat.getBestCategory(bProbs));
 
     //test to make sure sorted map's last key is cat 1 because it has the highest score.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
index 2382375..809d785 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ConllXPOSSampleStreamTest.java
@@ -41,8 +41,8 @@ public class ConllXPOSSampleStreamTest {
 
     POSSample a = sampleStream.read();
 
-    String aSentence[] = a.getSentence();
-    String aTags[] = a.getTags();
+    String[] aSentence = a.getSentence();
+    String[] aTags = a.getTags();
 
     assertEquals(22, aSentence.length);
     assertEquals(22, aTags.length);
@@ -115,8 +115,8 @@ public class ConllXPOSSampleStreamTest {
 
     POSSample b = sampleStream.read();
 
-    String bSentence[] = b.getSentence();
-    String bTags[] = b.getTags();
+    String[] bSentence = b.getSentence();
+    String[] bTags = b.getTags();
 
     assertEquals(12, bSentence.length);
     assertEquals(12, bTags.length);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
index fbda898..8fa31be 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java
@@ -30,7 +30,7 @@ import opennlp.tools.util.ObjectStreamUtils;
 
 public class ConstitParseSampleStreamTest {
 
-  private String sample1Tokens[] = new String[]{
+  private String[] sample1Tokens = new String[]{
       "L'",
       "autonomie",
       "de",
@@ -88,7 +88,7 @@ public class ConstitParseSampleStreamTest {
   private static byte[] getSample1() throws IOException {
     ByteArrayOutputStream out = new ByteArrayOutputStream();
 
-    byte buffer[] = new byte[1024];
+    byte[] buffer = new byte[1024];
     int length;
     try (InputStream sampleIn =
         ConstitParseSampleStreamTest.class.getResourceAsStream("sample1.xml")) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java
index 92528e7..46d0440 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java
@@ -33,7 +33,7 @@ public class BeamSearchTest {
 
     private String[] outcomeSequence;
 
-    IdentityFeatureGenerator(String outcomeSequence[]) {
+    IdentityFeatureGenerator(String[] outcomeSequence) {
       this.outcomeSequence = outcomeSequence;
     }
 
@@ -53,7 +53,7 @@ public class BeamSearchTest {
     private double bestOutcomeProb = 0.8d;
     private double otherOutcomeProb;
 
-    IdentityModel(String outcomes[]) {
+    IdentityModel(String[] outcomes) {
       this.outcomes = outcomes;
 
       for (int i = 0; i < outcomes.length; i++) {
@@ -65,7 +65,7 @@ public class BeamSearchTest {
 
     public double[] eval(String[] context) {
 
-      double probs[] = new double[outcomes.length];
+      double[] probs = new double[outcomes.length];
 
       for (int i = 0; i < probs.length; i++) {
         if (outcomes[i].equals(context[0])) {
@@ -118,10 +118,10 @@ public class BeamSearchTest {
   @Test
   public void testBestSequenceZeroLengthInput() {
 
-    String sequence[] = new String[0];
+    String[] sequence = new String[0];
     BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
 
-    String outcomes[] = new String[] {"1", "2", "3"};
+    String[] outcomes = new String[] {"1", "2", "3"};
     MaxentModel model = new IdentityModel(outcomes);
 
     BeamSearch<String> bs = new BeamSearch<>(3, model);
@@ -138,10 +138,10 @@ public class BeamSearchTest {
    */
   @Test
   public void testBestSequenceOneElementInput() {
-    String sequence[] = {"1"};
+    String[] sequence = {"1"};
     BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
 
-    String outcomes[] = new String[] {"1", "2", "3"};
+    String[] outcomes = new String[] {"1", "2", "3"};
     MaxentModel model = new IdentityModel(outcomes);
 
     BeamSearch<String> bs = new BeamSearch<>(3, model);
@@ -160,10 +160,10 @@ public class BeamSearchTest {
    */
   @Test
   public void testBestSequence() {
-    String sequence[] = {"1", "2", "3", "2", "1"};
+    String[] sequence = {"1", "2", "3", "2", "1"};
     BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
 
-    String outcomes[] = new String[] {"1", "2", "3"};
+    String[] outcomes = new String[] {"1", "2", "3"};
     MaxentModel model = new IdentityModel(outcomes);
 
     BeamSearch<String> bs = new BeamSearch<>(2, model);
@@ -186,10 +186,10 @@ public class BeamSearchTest {
    */
   @Test
   public void testBestSequenceWithValidator() {
-    String sequence[] = {"1", "2", "3", "2", "1"};
+    String[] sequence = {"1", "2", "3", "2", "1"};
     BeamSearchContextGenerator<String> cg = new IdentityFeatureGenerator(sequence);
 
-    String outcomes[] = new String[] {"1", "2", "3"};
+    String[] outcomes = new String[] {"1", "2", "3"};
     MaxentModel model = new IdentityModel(outcomes);
 
     BeamSearch<String> bs = new BeamSearch<>(2, model, 0);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
index 7599551..08d0f97 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
@@ -59,8 +59,8 @@ public class DictionaryNameFinderTest {
   public void testSingleTokeNameAtSentenceStart() {
     String sentence = "Max a b c d";
     SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
-    String tokens[] = tokenizer.tokenize(sentence);
-    Span names[] = mNameFinder.find(tokens);
+    String[] tokens = tokenizer.tokenize(sentence);
+    Span[] names = mNameFinder.find(tokens);
     Assert.assertTrue(names.length == 1);
     Assert.assertTrue(names[0].getStart() == 0 && names[0].getEnd() == 1);
   }
@@ -69,8 +69,8 @@ public class DictionaryNameFinderTest {
   public void testSingleTokeNameInsideSentence() {
     String sentence = "a b  Max c d";
     SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
-    String tokens[] = tokenizer.tokenize(sentence);
-    Span names[] = mNameFinder.find(tokens);
+    String[] tokens = tokenizer.tokenize(sentence);
+    Span[] names = mNameFinder.find(tokens);
     Assert.assertTrue(names.length == 1);
     Assert.assertTrue(names[0].getStart() == 2 && names[0].getEnd() == 3);
   }
@@ -80,40 +80,40 @@ public class DictionaryNameFinderTest {
     String sentence = "a b c Max";
 
     SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
-    String tokens[] = tokenizer.tokenize(sentence);
-    Span names[] = mNameFinder.find(tokens);
+    String[] tokens = tokenizer.tokenize(sentence);
+    Span[] names = mNameFinder.find(tokens);
     Assert.assertTrue(names.length == 1);
     Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
   }
 
   @Test
   public void testLastMatchingTokenNameIsChoosen() {
-    String sentence[] = {"a", "b", "c", "Vanessa"};
-    Span names[] = mNameFinder.find(sentence);
+    String[] sentence = {"a", "b", "c", "Vanessa"};
+    Span[] names = mNameFinder.find(sentence);
     Assert.assertTrue(names.length == 1);
     Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
   }
 
   @Test
   public void testLongerTokenNameIsPreferred() {
-    String sentence[] = {"a", "b", "c", "Vanessa", "Williams"};
-    Span names[] = mNameFinder.find(sentence);
+    String[] sentence = {"a", "b", "c", "Vanessa", "Williams"};
+    Span[] names = mNameFinder.find(sentence);
     Assert.assertTrue(names.length == 1);
     Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
   }
 
   @Test
   public void testCaseSensitivity() {
-    String sentence[] = {"a", "b", "c", "vanessa", "williams"};
-    Span names[] = mNameFinder.find(sentence);
+    String[] sentence = {"a", "b", "c", "vanessa", "williams"};
+    Span[] names = mNameFinder.find(sentence);
     Assert.assertTrue(names.length == 1);
     Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
   }
 
   @Test
   public void testCaseLongerEntry() {
-    String sentence[] = {"a", "b", "michael", "jordan"};
-    Span names[] = mNameFinder.find(sentence);
+    String[] sentence = {"a", "b", "michael", "jordan"};
+    Span[] names = mNameFinder.find(sentence);
     Assert.assertTrue(names.length == 1);
     Assert.assertTrue(names[0].length() == 2);
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
index 494af62..c258d07 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
@@ -78,7 +78,7 @@ public class NameFinderMETest {
 
     // now test if it can detect the sample sentences
 
-    String sentence[] = {"Alisa",
+    String[] sentence = {"Alisa",
         "appreciated",
         "the",
         "hint",
@@ -89,7 +89,7 @@ public class NameFinderMETest {
         "traditional",
         "meal."};
 
-    Span names[] = nameFinder.find(sentence);
+    Span[] names = nameFinder.find(sentence);
 
     Assert.assertEquals(1, names.length);
     Assert.assertEquals(new Span(0, 1, DEFAULT), names[0]);
@@ -150,10 +150,10 @@ public class NameFinderMETest {
     Assert.assertEquals("person", names2[0].getType());
     Assert.assertEquals("person", names2[1].getType());
 
-    String sentence[] = { "Alisa", "appreciated", "the", "hint", "and",
+    String[] sentence = { "Alisa", "appreciated", "the", "hint", "and",
         "enjoyed", "a", "delicious", "traditional", "meal." };
 
-    Span names[] = nameFinder.find(sentence);
+    Span[] names = nameFinder.find(sentence);
 
     Assert.assertEquals(1, names.length);
     Assert.assertEquals(new Span(0, 1, "person"), names[0]);
@@ -319,8 +319,8 @@ public class NameFinderMETest {
 
   @Test
   public void testDropOverlappingSpans() {
-    Span spans[] = new Span[] {new Span(1, 10), new Span(1,11), new Span(1,11), new Span(5, 15)};
-    Span remainingSpan[] = NameFinderME.dropOverlappingSpans(spans);
+    Span[] spans = new Span[] {new Span(1, 10), new Span(1,11), new Span(1,11), new Span(5, 15)};
+    Span[] remainingSpan = NameFinderME.dropOverlappingSpans(spans);
     Assert.assertEquals(new Span(1, 11), remainingSpan[0]);
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
index cf533f4..911f998 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
@@ -101,7 +101,7 @@ public class NameSampleTest {
   @Test
   public void testNameAtEnd() {
 
-    String sentence[] = new String[] {
+    String[] sentence = new String[] {
         "My",
         "name",
         "is",

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
index 0b3fe2a..c0f2fea 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
@@ -35,7 +35,7 @@ public class RegexNameFinderTest {
   public void testFindSingleTokenPattern() {
 
     Pattern testPattern = Pattern.compile("test");
-    String sentence[] = new String[]{"a", "test", "b", "c"};
+    String[] sentence = new String[]{"a", "test", "b", "c"};
 
 
     Pattern[] patterns = new Pattern[]{testPattern};
@@ -59,7 +59,7 @@ public class RegexNameFinderTest {
   public void testFindTokenizdPattern() {
     Pattern testPattern = Pattern.compile("[0-9]+ year");
 
-    String sentence[] = new String[]{"a", "80", "year", "b", "c"};
+    String[] sentence = new String[]{"a", "80", "year", "b", "c"};
 
     Pattern[] patterns = new Pattern[]{testPattern};
     Map<String, Pattern[]> regexMap = new HashMap<>();
@@ -83,7 +83,7 @@ public class RegexNameFinderTest {
   public void testFindMatchingPatternWithoutMatchingTokenBounds() {
     Pattern testPattern = Pattern.compile("[0-8] year"); // does match "0 year"
 
-    String sentence[] = new String[]{"a", "80", "year", "c"};
+    String[] sentence = new String[]{"a", "80", "year", "c"};
     Pattern[] patterns = new Pattern[]{testPattern};
     Map<String, Pattern[]> regexMap = new HashMap<>();
     String type = "testtype";

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java
index f8220c6..5d594ed 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java
@@ -102,7 +102,7 @@ public class ParseTest {
   public void testGetTagNodes() {
     Parse p = Parse.parseParse(PARSE_STRING);
 
-    Parse tags[] = p.getTagNodes();
+    Parse[] tags = p.getTagNodes();
 
     for (Parse node : tags) {
       Assert.assertTrue(node.isPosTag());

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
index 2e616b5..51cae2c 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
@@ -63,7 +63,7 @@ public class POSTaggerMETest {
 
     POSTagger tagger = new POSTaggerME(posModel);
 
-    String tags[] = tagger.tag(new String[] {
+    String[] tags = tagger.tag(new String[] {
         "The",
         "driver",
         "got",

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java
index 2b9e984..01b96fc 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java
@@ -41,7 +41,7 @@ public class WordTagSampleStreamTest {
         new WordTagSampleStream(new CollectionObjectStream<>(sampleString));
 
     POSSample sample = stream.read();
-    String words[] = sample.getSentence();
+    String[] words = sample.getSentence();
 
     Assert.assertEquals("This", words[0]);
     Assert.assertEquals("is", words[1]);
@@ -50,7 +50,7 @@ public class WordTagSampleStreamTest {
     Assert.assertEquals("sentence", words[4]);
     Assert.assertEquals(".", words[5]);
 
-    String tags[] = sample.getTags();
+    String[] tags = sample.getTags();
     Assert.assertEquals("x1", tags[0]);
     Assert.assertEquals("x2", tags[1]);
     Assert.assertEquals("x3", tags[2]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java
index c700f3c..6e226bb 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java
@@ -29,7 +29,7 @@ public class NewlineSentenceDetectorTest {
   private static void testSentenceValues(String sentences) {
     NewlineSentenceDetector sd = new NewlineSentenceDetector();
 
-    String results[] = sd.sentDetect(sentences);
+    String[] results = sd.sentDetect(sentences);
 
     Assert.assertEquals(3, results.length);
     Assert.assertEquals("one.", results[0]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
index 78d41cc..43d5829 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
@@ -124,7 +124,7 @@ public class SentenceDetectorMETest {
     Assert.assertEquals(sents[0],"This is a test");
 
     // Test that sentPosDetect also works
-    Span pos[] = sentDetect.sentPosDetect(sampleSentences2);
+    Span[] pos = sentDetect.sentPosDetect(sampleSentences2);
     Assert.assertEquals(pos.length,2);
     probs = sentDetect.getSentenceProbabilities();
     Assert.assertEquals(probs.length,2);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java
index fa04457..d6ad672 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java
@@ -31,9 +31,9 @@ public class DictionaryDetokenizerTest {
   @Test
   public void testDetokenizer() {
 
-    String tokens[] = new String[]{".", "!", "(", ")", "\"", "-"};
+    String[] tokens = new String[]{".", "!", "(", ")", "\"", "-"};
 
-    Operation operations[] = new Operation[]{
+    Operation[] operations = new Operation[]{
         Operation.MOVE_LEFT,
         Operation.MOVE_LEFT,
         Operation.MOVE_RIGHT,
@@ -44,7 +44,7 @@ public class DictionaryDetokenizerTest {
     DetokenizationDictionary dict = new DetokenizationDictionary(tokens, operations);
     Detokenizer detokenizer = new DictionaryDetokenizer(dict);
 
-    DetokenizationOperation detokenizeOperations[] =
+    DetokenizationOperation[] detokenizeOperations =
       detokenizer.detokenize(new String[]{"Simple",  "test", ".", "co", "-", "worker"});
 
     Assert.assertEquals(DetokenizationOperation.NO_OPERATION, detokenizeOperations[0]);
@@ -71,7 +71,7 @@ public class DictionaryDetokenizerTest {
 
     Detokenizer detokenizer = createLatinDetokenizer();
 
-    String tokens[] = new String[]{"A", "test", ",", "(", "string", ")", "."};
+    String[] tokens = new String[]{"A", "test", ",", "(", "string", ")", "."};
 
     String sentence = detokenizer.detokenize(tokens, null);
 
@@ -83,7 +83,7 @@ public class DictionaryDetokenizerTest {
 
     Detokenizer detokenizer = createLatinDetokenizer();
 
-    String tokens[] = new String[]{"A", "co", "-", "worker", "helped", "."};
+    String[] tokens = new String[]{"A", "co", "-", "worker", "helped", "."};
 
     String sentence = detokenizer.detokenize(tokens, null);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java
index 9ea82bb..84f5f10 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java
@@ -45,7 +45,7 @@ public class TokenSampleStreamTest {
 
     TokenSample tokenSample = sampleTokenStream.read();
 
-    Span tokenSpans[] = tokenSample.getTokenSpans();
+    Span[] tokenSpans = tokenSample.getTokenSpans();
 
     Assert.assertEquals(4, tokenSpans.length);
 
@@ -69,7 +69,7 @@ public class TokenSampleStreamTest {
 
     TokenSample tokenSample = sampleTokenStream.read();
 
-    Span tokenSpans[] = tokenSample.getTokenSpans();
+    Span[] tokenSpans = tokenSample.getTokenSpans();
 
     Assert.assertEquals(4, tokenSpans.length);
 
@@ -101,7 +101,7 @@ public class TokenSampleStreamTest {
 
     TokenSample tokenSample = sampleTokenStream.read();
 
-    Span tokenSpans[] = tokenSample.getTokenSpans();
+    Span[] tokenSpans = tokenSample.getTokenSpans();
 
     Assert.assertEquals(5, tokenSpans.length);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java
index 81f3507..1c329bc 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java
@@ -45,7 +45,7 @@ public class TokenSampleTest {
 
     Detokenizer detokenizer = DictionaryDetokenizerTest.createLatinDetokenizer();
 
-    String tokens[] = new String[]{
+    String[] tokens = new String[]{
         "start",
         "(", // move right
         ")", // move left

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
index 9acb2e8..5a7a811 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
@@ -40,7 +40,7 @@ public class TokenizerMETest {
 
     TokenizerME tokenizer = new TokenizerME(model);
 
-    String tokens[] = tokenizer.tokenize("test,");
+    String[] tokens = tokenizer.tokenize("test,");
 
     Assert.assertEquals(2, tokens.length);
     Assert.assertEquals("test", tokens[0]);
@@ -52,7 +52,7 @@ public class TokenizerMETest {
     TokenizerModel model = TokenizerTestUtil.createMaxentTokenModel();
 
     TokenizerME tokenizer = new TokenizerME(model);
-    String tokens[] = tokenizer.tokenize("Sounds like it's not properly thought through!");
+    String[] tokens = tokenizer.tokenize("Sounds like it's not properly thought through!");
 
     Assert.assertEquals(9, tokens.length);
     Assert.assertEquals("Sounds", tokens[0]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
index b57b0ec..09e2f44 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
@@ -29,7 +29,7 @@ public class FMeasureTest {
 
   private static final double DELTA = 1.0E-9d;
 
-  private Span gold[] = {
+  private Span[] gold = {
       new Span(8, 9),
       new Span(9, 10),
       new Span(10, 12),
@@ -38,7 +38,7 @@ public class FMeasureTest {
       new Span(15, 16)
   };
 
-  private Span predicted[] = {
+  private Span[] predicted = {
       new Span(14, 15),
       new Span(15, 16),
       new Span(100, 120),
@@ -46,7 +46,7 @@ public class FMeasureTest {
       new Span(220, 230)
   };
 
-  private Span predictedCompletelyDistinct[] = {
+  private Span[] predictedCompletelyDistinct = {
       new Span(100, 120),
       new Span(210, 220),
       new Span(211, 220),
@@ -54,7 +54,7 @@ public class FMeasureTest {
       new Span(220, 230)
   };
 
-  private Span goldToMerge[] = {
+  private Span[] goldToMerge = {
       new Span(8, 9),
       new Span(9, 10),
       new Span(11, 11),
@@ -64,7 +64,7 @@ public class FMeasureTest {
       new Span(18, 19),
   };
 
-  private Span predictedToMerge[] = {
+  private Span[] predictedToMerge = {
       new Span(8, 9),
       new Span(14, 15),
       new Span(15, 16),

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
index 01b1d95..66471ff 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
@@ -29,12 +29,12 @@ import org.junit.Test;
  */
 public class CachedFeatureGeneratorTest {
 
-  private AdaptiveFeatureGenerator identityGenerator[] = new AdaptiveFeatureGenerator[] {
+  private AdaptiveFeatureGenerator[] identityGenerator = new AdaptiveFeatureGenerator[] {
       new IdentityFeatureGenerator()};
 
-  private String testSentence1[];
+  private String[] testSentence1;
 
-  private String testSentence2[];
+  private String[] testSentence2;
 
   private List<String> features;
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java
index c9da178..ca23589 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorTest.java
@@ -33,7 +33,7 @@ public class PreviousMapFeatureGeneratorTest {
 
     AdaptiveFeatureGenerator fg = new PreviousMapFeatureGenerator();
 
-    String sentence[] = new String[] {"a", "b", "c"};
+    String[] sentence = new String[] {"a", "b", "c"};
 
     List<String> features = new ArrayList<>();
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
index a60ac9b..2edaba7 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
@@ -150,7 +150,7 @@ public final class Chunker extends CasAnnotator_ImplBase {
         context, mTokenType, UimaUtil.POS_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING);
   }
 
-  private void addChunkAnnotation(CAS tcas, AnnotationFS tokenAnnotations[],
+  private void addChunkAnnotation(CAS tcas, AnnotationFS[] tokenAnnotations,
                                   String tag, int start, int end) {
     AnnotationFS chunk = tcas.createAnnotation(mChunkType,
         tokenAnnotations[start].getBegin(), tokenAnnotations[end - 1].getEnd());
@@ -167,9 +167,9 @@ public final class Chunker extends CasAnnotator_ImplBase {
 
     FSIndex<AnnotationFS> tokenAnnotationIndex = tcas.getAnnotationIndex(mTokenType);
 
-    String tokens[] = new String[tokenAnnotationIndex.size()];
-    String pos[] = new String[tokenAnnotationIndex.size()];
-    AnnotationFS tokenAnnotations[] = new AnnotationFS[tokenAnnotationIndex
+    String[] tokens = new String[tokenAnnotationIndex.size()];
+    String[] pos = new String[tokenAnnotationIndex.size()];
+    AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenAnnotationIndex
         .size()];
 
     int index = 0;
@@ -184,7 +184,7 @@ public final class Chunker extends CasAnnotator_ImplBase {
           mPosFeature);
     }
 
-    String result[] = mChunker.chunk(tokens, pos);
+    String[] result = mChunker.chunk(tokens, pos);
 
     int start = -1;
     int end = -1;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
index 6d76c8f..db9c075 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
@@ -82,7 +82,7 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase {
 
   public void process(CAS cas) {
 
-    double result[];
+    double[] result;
 
     if (mTokenType != null) {
       // TODO:

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
index 793da86..67efb55 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
@@ -134,7 +134,7 @@ abstract class AbstractNameFinder extends CasAnnotator_ImplBase {
     }
   }
 
-  protected void postProcessAnnotations(Span detectedNames[],
+  protected void postProcessAnnotations(Span[] detectedNames,
       AnnotationFS[] nameAnnotations) {
   }
 
@@ -186,7 +186,7 @@ abstract class AbstractNameFinder extends CasAnnotator_ImplBase {
       Span[] names  = find(cas,
           sentenceTokenList.toArray(new String[sentenceTokenList.size()]));
 
-      AnnotationFS nameAnnotations[] = new AnnotationFS[names.length];
+      AnnotationFS[] nameAnnotations = new AnnotationFS[names.length];
 
       for (int i = 0; i < names.length; i++) {
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
index e89d2d6..2a844cf 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
@@ -154,9 +154,9 @@ public final class NameFinder extends AbstractNameFinder {
 
   protected Span[] find(CAS cas, String[] tokens) {
 
-    Span names[] = mNameFinder.find(tokens);
+    Span[] names = mNameFinder.find(tokens);
 
-    double probs[] = mNameFinder.probs();
+    double[] probs = mNameFinder.probs();
 
     for (double prob : probs) {
       documentConfidence.add(prob);
@@ -165,7 +165,7 @@ public final class NameFinder extends AbstractNameFinder {
     return names;
   }
 
-  protected void postProcessAnnotations(Span detectedNames[],
+  protected void postProcessAnnotations(Span[] detectedNames,
                                         AnnotationFS[] nameAnnotations) {
 
     if (probabilityFeature != null) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
index 8e8920f..e3544b1 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
@@ -36,7 +36,7 @@ public final class NumberUtil {
   public static boolean isLanguageSupported(String languageCode) {
     Locale locale = new Locale(languageCode);
 
-    Locale possibleLocales[] = NumberFormat.getAvailableLocales();
+    Locale[] possibleLocales = NumberFormat.getAvailableLocales();
 
     boolean isLocaleSupported = false;
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
index 01c25c3..d147259 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
@@ -199,8 +199,8 @@ public class Parser extends CasAnnotator_ImplBase {
 
   protected AnnotationFS createAnnotation(CAS cas, int offset, Parse parse) {
 
-    Parse parseChildren[] = parse.getChildren();
-    AnnotationFS parseChildAnnotations[] = new AnnotationFS[parseChildren.length];
+    Parse[] parseChildren = parse.getChildren();
+    AnnotationFS[] parseChildAnnotations = new AnnotationFS[parseChildren.length];
 
     // do this for all children
     for (int i = 0; i < parseChildren.length; i++) {
@@ -244,13 +244,13 @@ public class Parser extends CasAnnotator_ImplBase {
      * @param sentence
      * @param tokens
      */
-    public ParseConverter(String sentence, Span tokens[]) {
+    public ParseConverter(String sentence, Span[] tokens) {
 
       mSentence = sentence;
 
       StringBuilder sentenceStringBuilder = new StringBuilder();
 
-      String tokenList[] = new String[tokens.length];
+      String[] tokenList = new String[tokens.length];
 
       for (int i = 0; i < tokens.length; i++) {
         String tokenString = tokens[i].getCoveredText(sentence).toString();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
index d26c64a..db0f66d 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
@@ -180,7 +180,7 @@ public final class POSTagger extends CasAnnotator_ImplBase {
       final List<String> posTags = Arrays.asList(this.posTagger.tag(
           sentenceTokenList.toArray(new String[sentenceTokenList.size()])));
 
-      double posProbabilities[] = null;
+      double[] posProbabilities = null;
 
       if (this.probabilityFeature != null) {
         posProbabilities = this.posTagger.probs();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
index a905199..ab9095a 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
@@ -87,7 +87,7 @@ public abstract class AbstractSentenceDetector extends CasAnnotator_ImplBase {
 
   protected abstract Span[] detectSentences(String text);
 
-  protected void postProcessAnnotations(AnnotationFS sentences[]) {
+  protected void postProcessAnnotations(AnnotationFS[] sentences) {
   }
 
   @Override
@@ -106,7 +106,7 @@ public abstract class AbstractSentenceDetector extends CasAnnotator_ImplBase {
 
       Span[] sentPositions = detectSentences(text);
 
-      AnnotationFS sentences[] = new AnnotationFS[sentPositions.length];
+      AnnotationFS[] sentences = new AnnotationFS[sentPositions.length];
 
       for (int i = 0; i < sentPositions.length; i++) {
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
index fdb434d..acb5c6b 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
@@ -114,10 +114,10 @@ public final class SentenceDetector extends AbstractSentenceDetector {
   }
 
   @Override
-  protected void postProcessAnnotations(AnnotationFS sentences[]) {
+  protected void postProcessAnnotations(AnnotationFS[] sentences) {
 
     if (probabilityFeature != null) {
-      double sentenceProbabilities[] = sentenceDetector.getSentenceProbabilities();
+      double[] sentenceProbabilities = sentenceDetector.getSentenceProbabilities();
 
       for (int i = 0; i < sentences.length; i++) {
         sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
index ee02dc9..b1f7abc 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
@@ -91,8 +91,8 @@ public abstract class AbstractTokenizer extends CasAnnotator_ImplBase {
         UimaUtil.TOKEN_TYPE_PARAMETER);
   }
 
-  protected void postProcessAnnotations(Span tokens[],
-                                        AnnotationFS tokenAnnotations[]) {
+  protected void postProcessAnnotations(Span[] tokens,
+                                        AnnotationFS[] tokenAnnotations) {
   }
 
   protected abstract Span[] tokenize(CAS cas, AnnotationFS sentence);
@@ -107,13 +107,13 @@ public abstract class AbstractTokenizer extends CasAnnotator_ImplBase {
         UimaUtil.removeAnnotations(cas, sentence, tokenType);
       }
 
-      Span tokenSpans[] = tokenize(cas, sentence);
+      Span[] tokenSpans = tokenize(cas, sentence);
 
       int sentenceOffset = sentence.getBegin();
 
       StringBuilder tokeninzedSentenceLog = new StringBuilder();
 
-      AnnotationFS tokenAnnotations[] = new AnnotationFS[tokenSpans.length];
+      AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenSpans.length];
 
       for (int i = 0; i < tokenSpans.length; i++) {
         tokenAnnotations[i] = cas

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
index 9f72f92..b558241 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
@@ -123,7 +123,7 @@ public final class Tokenizer extends AbstractTokenizer {
                                         AnnotationFS[] tokenAnnotations) {
     // if interest
     if (probabilityFeature != null) {
-      double tokenProbabilties[] = tokenizer.getTokenProbabilities();
+      double[] tokenProbabilties = tokenizer.getTokenProbabilities();
 
       for (int i = 0; i < tokenAnnotations.length; i++) {
         tokenAnnotations[i].setDoubleValue(probabilityFeature,

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
index 338dfec..ecec498 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
@@ -61,7 +61,7 @@ final public class OpennlpUtil {
 
     try (InputStream in = new FileInputStream(inFile)) {
 
-      byte buffer[] = new byte[1024];
+      byte[] buffer = new byte[1024];
       int len;
       while ((len = in.read(buffer)) > 0) {
         bytes.write(buffer, 0, len);

[15/50] [abbrv] opennlp git commit: OPENNLP-983: Make suffix/prefix length configurable

Posted by jo...@apache.org.

OPENNLP-983: Make suffix/prefix length configurable

This closes #121


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/1cd2658d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/1cd2658d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/1cd2658d

Branch: refs/heads/parser_regression
Commit: 1cd2658d0179afcf982229fab9c24da62f944c58
Parents: fc3b12f
Author: jzonthemtn <je...@mtnfog.com>
Authored: Mon Feb 13 07:57:21 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:53 2017 +0200

----------------------------------------------------------------------
 .../tools/util/featuregen/GeneratorFactory.java | 22 ++++-
 .../util/featuregen/PrefixFeatureGenerator.java | 32 +++++--
 .../util/featuregen/SuffixFeatureGenerator.java | 33 +++++--
 .../featuregen/PrefixFeatureGeneratorTest.java  | 92 ++++++++++++++++++++
 .../featuregen/SuffixFeatureGeneratorTest.java  | 92 ++++++++++++++++++++
 5 files changed, 251 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index fa97f43..ef08cfb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -555,7 +555,16 @@ public class GeneratorFactory {
 
     public AdaptiveFeatureGenerator create(Element generatorElement,
         FeatureGeneratorResourceProvider resourceManager) {
-      return new PrefixFeatureGenerator();
+        
+      String attribute = generatorElement.getAttribute("length");
+        
+      int prefixLength = PrefixFeatureGenerator.DEFAULT_MAX_LENGTH;
+        
+      if (!Objects.equals(attribute, "")) {
+        prefixLength = Integer.parseInt(attribute);
+      }
+        
+      return new PrefixFeatureGenerator(prefixLength);
     }
 
     static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
@@ -570,7 +579,16 @@ public class GeneratorFactory {
 
     public AdaptiveFeatureGenerator create(Element generatorElement,
         FeatureGeneratorResourceProvider resourceManager) {
-      return new SuffixFeatureGenerator();
+        
+      String attribute = generatorElement.getAttribute("length");
+        
+      int suffixLength = SuffixFeatureGenerator.DEFAULT_MAX_LENGTH;
+        
+      if (!Objects.equals(attribute, "")) {
+        suffixLength = Integer.parseInt(attribute);
+      }
+        
+      return new SuffixFeatureGenerator(suffixLength);
     }
 
     static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
index 8cdd48f..04fcd15 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
@@ -21,21 +21,35 @@ import java.util.List;
 
 public class PrefixFeatureGenerator implements AdaptiveFeatureGenerator {
 
-  private static final int PREFIX_LENGTH = 4;
-
-  private static String[] getPrefixes(String lex) {
-    String[] prefs = new String[PREFIX_LENGTH];
-    for (int li = 0; li < PREFIX_LENGTH; li++) {
-      prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
-    }
-    return prefs;
+  static final int DEFAULT_MAX_LENGTH = 4;
+  
+  private final int prefixLength;
+  
+  public PrefixFeatureGenerator() {
+    prefixLength = DEFAULT_MAX_LENGTH;
+  }
+  
+  public PrefixFeatureGenerator(int prefixLength) {
+    this.prefixLength = prefixLength;
   }
 
+  @Override
   public void createFeatures(List<String> features, String[] tokens, int index,
       String[] previousOutcomes) {
-    String[] prefs = PrefixFeatureGenerator.getPrefixes(tokens[index]);
+    String[] prefs = getPrefixes(tokens[index]);
     for (String pref : prefs) {
       features.add("pre=" + pref);
     }
   }
+  
+  private String[] getPrefixes(String lex) {
+      
+    int prefixes = Math.min(prefixLength, lex.length());
+    
+    String[] prefs = new String[prefixes];
+    for (int li = 0; li < prefixes; li++) {
+      prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
+    }
+    return prefs;
+  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
index a17fd47..c626fd9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
@@ -21,21 +21,36 @@ import java.util.List;
 
 public class SuffixFeatureGenerator implements AdaptiveFeatureGenerator {
 
-  private static final int SUFFIX_LENGTH = 4;
-
-  public static String[] getSuffixes(String lex) {
-    String[] suffs = new String[SUFFIX_LENGTH];
-    for (int li = 0; li < SUFFIX_LENGTH; li++) {
-      suffs[li] = lex.substring(Math.max(lex.length() - li - 1, 0));
-    }
-    return suffs;
+  static final int DEFAULT_MAX_LENGTH = 4;
+    
+  private final int suffixLength;
+  
+  public SuffixFeatureGenerator() {
+    suffixLength = DEFAULT_MAX_LENGTH;
+  }
+  
+  public SuffixFeatureGenerator(int suffixLength) {
+    this.suffixLength = suffixLength;
   }
 
+  @Override
   public void createFeatures(List<String> features, String[] tokens, int index,
       String[] previousOutcomes) {
-    String[] suffs = SuffixFeatureGenerator.getSuffixes(tokens[index]);
+    String[] suffs = getSuffixes(tokens[index]);
     for (String suff : suffs) {
       features.add("suf=" + suff);
     }
   }
+  
+  private String[] getSuffixes(String lex) {
+      
+    int suffixes = Math.min(suffixLength, lex.length());
+      
+    String[] suffs = new String[suffixes];
+    for (int li = 0; li < suffixes; li++) {
+      suffs[li] = lex.substring(Math.max(lex.length() - li - 1, 0));
+    }
+    return suffs;
+  }
+  
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
new file mode 100644
index 0000000..5639174
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class PrefixFeatureGeneratorTest {
+
+  private List<String> features;
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void lengthTest1() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 0;
+    int suffixLength = 2;
+      
+    AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("pre=T", features.get(0));
+    Assert.assertEquals("pre=Th", features.get(1));
+    
+  }
+  
+  @Test
+  public void lengthTest2() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 3;
+    int suffixLength = 5;
+      
+    AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    
+    Assert.assertEquals(5, features.size());
+    Assert.assertEquals("pre=e", features.get(0));
+    Assert.assertEquals("pre=ex", features.get(1));
+    Assert.assertEquals("pre=exa", features.get(2));
+    Assert.assertEquals("pre=exam", features.get(3));
+    Assert.assertEquals("pre=examp", features.get(4));
+    
+  }
+  
+  @Test
+  public void lengthTest3() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 1;
+    int suffixLength = 5;
+      
+    AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+        
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("pre=i", features.get(0));
+    Assert.assertEquals("pre=is", features.get(1));
+    
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
new file mode 100644
index 0000000..fcb23a6
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class SuffixFeatureGeneratorTest {
+
+  private List<String> features;
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void lengthTest1() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 0;
+    int suffixLength = 2;
+      
+    AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("suf=s", features.get(0));
+    Assert.assertEquals("suf=is", features.get(1));
+    
+  }
+  
+  @Test
+  public void lengthTest2() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 3;
+    int suffixLength = 5;
+      
+    AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(5, features.size());
+    Assert.assertEquals("suf=e", features.get(0));
+    Assert.assertEquals("suf=le", features.get(1));
+    Assert.assertEquals("suf=ple", features.get(2));
+    Assert.assertEquals("suf=mple", features.get(3));
+    Assert.assertEquals("suf=ample", features.get(4));
+    
+  }
+  
+  @Test
+  public void lengthTest3() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 1;
+    int suffixLength = 5;
+      
+    AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("suf=s", features.get(0));
+    Assert.assertEquals("suf=is", features.get(1));
+    
+  }
+}

[24/50] [abbrv] opennlp git commit: OPENNLP-995: Add a PR Review Template for contributors

Posted by jo...@apache.org.

OPENNLP-995: Add a PR Review Template for contributors


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/82caa558
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/82caa558
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/82caa558

Branch: refs/heads/parser_regression
Commit: 82caa558d8942c9366af07a5a80bd088aff6c10b
Parents: 6cdca66
Author: smarthi <sm...@apache.org>
Authored: Thu Feb 23 07:46:17 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:54 2017 +0200

----------------------------------------------------------------------
 .github/CONTRIBUTING.md          | 11 +++++++++++
 .github/PULL_REQUEST_TEMPLATE.md | 27 +++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/82caa558/.github/CONTRIBUTING.md
----------------------------------------------------------------------
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..577eb16
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+# How to contribute to Apache OpenNLP
+
+Thank you for your intention to contribute to the Apache OpenNLP project. As an open-source community, we highly appreciate external contributions to our project.
+
+To make the process smooth for the project *committers* (those who review and accept changes) and *contributors* (those who propose new changes via pull requests), there are a few rules to follow.
+
+## Contribution Guidelines
+
+Please check out the [How to get involved](http://opennlp.apache.org/get-involved.html) to understand how contributions are made. 
+A detailed list of coding standards can be found at [Apache OpenNLP Code Conventions](http://opennlp.apache.org/code-conventions.html) which also contains a list of coding guidelines that you should follow.
+For pull requests, there is a [check list](PULL_REQUEST_TEMPLATE.md) with criteria for acceptable contributions.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/82caa558/.github/PULL_REQUEST_TEMPLATE.md
----------------------------------------------------------------------
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..579e2e0
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,27 @@
+Thank you for contributing to Apache OpenNLP.
+
+In order to streamline the review of the contribution we ask you
+to ensure the following steps have been taken:
+
+### For all changes:
+- [ ] Is there a JIRA ticket associated with this PR? Is it referenced 
+     in the commit message?
+
+- [ ] Does your PR title start with OPENNLP-XXXX where XXXX is the JIRA number you are trying to resolve? Pay particular attention to the hyphen "-" character.
+
+- [ ] Has your PR been rebased against the latest commit within the target branch (typically master)?
+
+- [ ] Is your initial contribution a single, squashed commit?
+
+### For code changes:
+- [ ] Have you ensured that the full suite of tests is executed via mvn clean install at the root opennlp folder?
+- [ ] Have you written or updated unit tests to verify your changes?
+- [ ] If adding new dependencies to the code, are these dependencies licensed in a way that is compatible for inclusion under [ASF 2.0](http://www.apache.org/legal/resolved.html#category-a)? 
+- [ ] If applicable, have you updated the LICENSE file, including the main LICENSE file in opennlp folder?
+- [ ] If applicable, have you updated the NOTICE file, including the main NOTICE file found in opennlp folder?
+
+### For documentation related changes:
+- [ ] Have you ensured that format looks appropriate for the output in which it is rendered?
+
+### Note:
+Please ensure that once the PR is submitted, you check travis-ci for build issues and submit an update to your PR as soon as possible.

[17/50] [abbrv] opennlp git commit: OPENNLP-990 Fix all array style violations and add a checkstyle rule

Posted by jo...@apache.org.

OPENNLP-990 Fix all array style violations and add a checkstyle rule

This closes #127


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fdff127b
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fdff127b
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fdff127b

Branch: refs/heads/parser_regression
Commit: fdff127b38dafb2bbb8df186385bfdb8abc0e9d1
Parents: 1cd2658
Author: Peter Thygesen <pe...@gmail.com>
Authored: Thu Feb 16 12:48:12 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:53 2017 +0200

----------------------------------------------------------------------
 checkstyle.xml                                  |  1 +
 .../opennlp/bratann/NameFinderAnnService.java   |  2 +-
 .../opennlp/bratann/NameFinderResource.java     |  6 ++---
 .../java/opennlp/morfologik/cmdline/CLI.java    |  2 +-
 .../chunker/ChunkSampleSequenceStream.java      |  4 +--
 .../java/opennlp/tools/chunker/Chunker.java     |  4 +--
 .../tools/cmdline/AbstractConverterTool.java    |  2 +-
 .../opennlp/tools/cmdline/ArgumentParser.java   | 16 +++++------
 .../opennlp/tools/cmdline/BasicCmdLineTool.java |  2 +-
 .../main/java/opennlp/tools/cmdline/CLI.java    |  2 +-
 .../java/opennlp/tools/cmdline/CmdLineUtil.java | 10 +++----
 .../tools/cmdline/EvaluationErrorPrinter.java   | 10 +++----
 .../cmdline/FineGrainedReportListener.java      |  2 +-
 .../tools/cmdline/ObjectStreamFactory.java      |  2 +-
 .../opennlp/tools/cmdline/TypedCmdLineTool.java |  2 +-
 .../tools/cmdline/doccat/DoccatTool.java        |  2 +-
 .../cmdline/entitylinker/EntityLinkerTool.java  |  2 +-
 .../TokenNameFinderCrossValidatorTool.java      |  4 +--
 .../namefind/TokenNameFinderEvaluatorTool.java  |  2 +-
 .../cmdline/namefind/TokenNameFinderTool.java   |  6 ++---
 .../namefind/TokenNameFinderTrainerTool.java    |  8 +++---
 .../tools/cmdline/postag/POSTaggerTool.java     |  2 +-
 .../tokenizer/DictionaryDetokenizerTool.java    |  2 +-
 .../opennlp/tools/dictionary/Dictionary.java    |  2 +-
 .../tools/doccat/DocumentCategorizer.java       |  6 ++---
 .../DocumentCategorizerContextGenerator.java    |  2 +-
 .../doccat/DocumentCategorizerEvaluator.java    |  4 +--
 .../tools/doccat/DocumentCategorizerME.java     |  4 +--
 .../opennlp/tools/doccat/DocumentSample.java    |  4 +--
 .../tools/doccat/DocumentSampleStream.java      |  4 +--
 .../formats/BioNLP2004NameSampleStream.java     |  2 +-
 .../tools/formats/Conll02NameSampleStream.java  |  2 +-
 .../tools/formats/Conll03NameSampleStream.java  |  2 +-
 .../tools/formats/ConllXPOSSampleStream.java    |  2 +-
 .../tools/formats/DirectorySampleStream.java    |  4 +--
 .../tools/formats/EvalitaNameSampleStream.java  |  2 +-
 .../formats/LeipzigDoccatSampleStream.java      |  2 +-
 .../LeipzigDocumentSampleStreamFactory.java     |  4 +--
 .../formats/brat/BratAnnotationStream.java      | 10 +++----
 .../tools/formats/brat/BratDocument.java        |  2 +-
 .../formats/brat/BratNameSampleStream.java      |  2 +-
 .../convert/FileToByteArraySampleStream.java    |  2 +-
 .../convert/FileToStringSampleStream.java       |  2 +-
 .../formats/muc/MucNameContentHandler.java      |  2 +-
 .../ontonotes/OntoNotesNameSampleStream.java    |  2 +-
 .../lemmatizer/LemmaSampleSequenceStream.java   |  6 ++---
 .../opennlp/tools/lemmatizer/Lemmatizer.java    |  2 +-
 .../main/java/opennlp/tools/ml/BeamSearch.java  |  4 +--
 .../java/opennlp/tools/ml/maxent/GISModel.java  |  2 +-
 .../opennlp/tools/ml/model/MaxentModel.java     |  2 +-
 .../SimplePerceptronSequenceTrainer.java        |  4 +--
 .../java/opennlp/tools/namefind/BioCodec.java   |  2 +-
 .../namefind/DefaultNameContextGenerator.java   |  4 +--
 .../tools/namefind/DictionaryNameFinder.java    |  2 +-
 .../tools/namefind/NameFinderEventStream.java   |  2 +-
 .../opennlp/tools/namefind/NameFinderME.java    |  2 +-
 .../java/opennlp/tools/namefind/NameSample.java |  2 +-
 .../namefind/NameSampleSequenceStream.java      |  4 +--
 .../opennlp/tools/namefind/RegexNameFinder.java |  8 +++---
 .../opennlp/tools/namefind/TokenNameFinder.java |  2 +-
 .../namefind/TokenNameFinderCrossValidator.java |  4 +--
 .../namefind/TokenNameFinderEvaluator.java      |  4 +--
 .../tools/namefind/TokenNameFinderFactory.java  |  2 +-
 .../tools/namefind/TokenNameFinderModel.java    |  2 +-
 .../tools/parser/AbstractBottomUpParser.java    |  6 ++---
 .../tools/parser/ChunkContextGenerator.java     |  2 +-
 .../tools/parser/ParserChunkerFactory.java      |  2 +-
 .../parser/ParserChunkerSequenceValidator.java  |  2 +-
 .../opennlp/tools/parser/PosSampleStream.java   |  4 +--
 .../opennlp/tools/postag/POSDictionary.java     |  2 +-
 .../java/opennlp/tools/postag/POSEvaluator.java |  4 +--
 .../java/opennlp/tools/postag/POSSample.java    | 10 +++----
 .../tools/postag/POSSampleEventStream.java      |  6 ++---
 .../tools/postag/POSSampleSequenceStream.java   |  4 +--
 .../sentdetect/DefaultEndOfSentenceScanner.java |  4 +--
 .../sentdetect/SentenceDetectorEvaluator.java   |  6 ++---
 .../tools/sentdetect/SentenceDetectorME.java    |  2 +-
 .../tools/sentdetect/SentenceSample.java        |  2 +-
 .../tokenize/DetokenizationDictionary.java      |  4 +--
 .../opennlp/tools/tokenize/Detokenizer.java     |  4 +--
 .../tools/tokenize/DictionaryDetokenizer.java   |  6 ++---
 .../tools/tokenize/TokSpanEventStream.java      |  2 +-
 .../opennlp/tools/tokenize/TokenSample.java     |  6 ++---
 .../tools/tokenize/TokenizerEvaluator.java      |  2 +-
 .../opennlp/tools/tokenize/TokenizerStream.java |  2 +-
 .../java/opennlp/tools/util/SequenceCodec.java  |  2 +-
 .../java/opennlp/tools/util/StringList.java     |  2 +-
 .../java/opennlp/tools/util/StringUtil.java     |  4 +--
 .../DocumentBeginFeatureGenerator.java          |  2 +-
 .../tools/util/featuregen/InSpanGenerator.java  |  4 +--
 .../util/featuregen/WordClusterDictionary.java  |  2 +-
 .../opennlp/tools/util/model/ModelUtil.java     |  2 +-
 .../opennlp/tools/chunker/ChunkerMETest.java    |  2 +-
 .../tools/cmdline/ArgumentParserTest.java       |  6 ++---
 .../tools/doccat/DocumentCategorizerMETest.java |  4 +--
 .../tools/doccat/DocumentCategorizerNBTest.java |  4 +--
 .../formats/ConllXPOSSampleStreamTest.java      |  8 +++---
 .../ConstitParseSampleStreamTest.java           |  4 +--
 .../java/opennlp/tools/ml/BeamSearchTest.java   | 22 +++++++--------
 .../namefind/DictionaryNameFinderTest.java      | 28 ++++++++++----------
 .../tools/namefind/NameFinderMETest.java        | 12 ++++-----
 .../opennlp/tools/namefind/NameSampleTest.java  |  2 +-
 .../tools/namefind/RegexNameFinderTest.java     |  6 ++---
 .../java/opennlp/tools/parser/ParseTest.java    |  2 +-
 .../opennlp/tools/postag/POSTaggerMETest.java   |  2 +-
 .../tools/postag/WordTagSampleStreamTest.java   |  4 +--
 .../sentdetect/NewlineSentenceDetectorTest.java |  2 +-
 .../sentdetect/SentenceDetectorMETest.java      |  2 +-
 .../tokenize/DictionaryDetokenizerTest.java     | 10 +++----
 .../tools/tokenize/TokenSampleStreamTest.java   |  6 ++---
 .../opennlp/tools/tokenize/TokenSampleTest.java |  2 +-
 .../opennlp/tools/tokenize/TokenizerMETest.java |  4 +--
 .../opennlp/tools/util/eval/FMeasureTest.java   | 10 +++----
 .../featuregen/CachedFeatureGeneratorTest.java  |  6 ++---
 .../PreviousMapFeatureGeneratorTest.java        |  2 +-
 .../main/java/opennlp/uima/chunker/Chunker.java | 10 +++----
 .../doccat/AbstractDocumentCategorizer.java     |  2 +-
 .../uima/namefind/AbstractNameFinder.java       |  4 +--
 .../java/opennlp/uima/namefind/NameFinder.java  |  6 ++---
 .../opennlp/uima/normalizer/NumberUtil.java     |  2 +-
 .../main/java/opennlp/uima/parser/Parser.java   |  8 +++---
 .../java/opennlp/uima/postag/POSTagger.java     |  2 +-
 .../sentdetect/AbstractSentenceDetector.java    |  4 +--
 .../uima/sentdetect/SentenceDetector.java       |  4 +--
 .../uima/tokenize/AbstractTokenizer.java        |  8 +++---
 .../java/opennlp/uima/tokenize/Tokenizer.java   |  2 +-
 .../java/opennlp/uima/util/OpennlpUtil.java     |  2 +-
 127 files changed, 264 insertions(+), 263 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/checkstyle.xml
----------------------------------------------------------------------
diff --git a/checkstyle.xml b/checkstyle.xml
index ab65feb..1bfe788 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -126,5 +126,6 @@
                 value="STANDARD_JAVA_PACKAGE###THIRD_PARTY_PACKAGE###SPECIAL_IMPORTS###STATIC"/>
     </module>
     <module name="EqualsHashCode"/>
+    <module name="ArrayTypeStyle"/>
   </module>
 </module>

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
index 5519866..a6085e7 100644
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
@@ -44,7 +44,7 @@ public class NameFinderAnnService {
 
   public static SentenceDetector sentenceDetector = new NewlineSentenceDetector();
   public static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
-  public static TokenNameFinder nameFinders[];
+  public static TokenNameFinder[] nameFinders;
 
   public static void main(String[] args) throws Exception {
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
index bd19bca..468f898 100644
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
@@ -39,7 +39,7 @@ public class NameFinderResource {
 
   private SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector;
   private Tokenizer tokenizer = NameFinderAnnService.tokenizer;
-  private TokenNameFinder nameFinders[] = NameFinderAnnService.nameFinders;
+  private TokenNameFinder[] nameFinders = NameFinderAnnService.nameFinders;
 
   private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset, int endOffset) {
     for (int i = beginOffset; i < endOffset; i++) {
@@ -66,10 +66,10 @@ public class NameFinderResource {
       // offset of sentence gets lost here!
       Span[] tokenSpans = tokenizer.tokenizePos(sentenceText);
 
-      String tokens[] = Span.spansToStrings(tokenSpans, sentenceText);
+      String[] tokens = Span.spansToStrings(tokenSpans, sentenceText);
 
       for (TokenNameFinder nameFinder : nameFinders) {
-        Span names[] = nameFinder.find(tokens);
+        Span[] names = nameFinder.find(tokens);
 
         for (Span name : names) {
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
index d659435..664c03a 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
@@ -101,7 +101,7 @@ public final class CLI {
       System.exit(0);
     }
 
-    String toolArguments[] = new String[args.length - 1];
+    String[] toolArguments = new String[args.length - 1];
     System.arraycopy(args, 1, toolArguments, 0, toolArguments.length);
 
     String toolName = args[0];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
index 9898bd4..eb42aa9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
@@ -41,8 +41,8 @@ public class ChunkSampleSequenceStream implements SequenceStream {
     ChunkSample sample = samples.read();
 
     if (sample != null) {
-      String sentence[] = sample.getSentence();
-      String tags[] = sample.getTags();
+      String[] sentence = sample.getSentence();
+      String[] tags = sample.getTags();
       Event[] events = new Event[sentence.length];
 
       for (int i = 0; i < sentence.length; i++) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
index c496d12..5bdec75 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
@@ -33,7 +33,7 @@ public interface Chunker {
    *
    * @return an array of chunk tags for each token in the sequence.
    */
-  String[] chunk(String[] toks, String tags[]);
+  String[] chunk(String[] toks, String[] tags);
 
   /**
    * Generates tagged chunk spans for the given sequence returning the result in a span array.
@@ -43,7 +43,7 @@ public interface Chunker {
    *
    * @return an array of spans with chunk tags for each chunk in the sequence.
    */
-  Span[] chunkAsSpans(String[] toks, String tags[]);
+  Span[] chunkAsSpans(String[] toks, String[] tags);
 
   /**
    * Returns the top k chunk sequences for the specified sentence with the specified pos-tags

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java
index a6b81ea..4c95b75 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java
@@ -89,7 +89,7 @@ public abstract class AbstractConverterTool<T> extends TypedCmdLineTool<T> {
       format = args[0];
       ObjectStreamFactory<T> streamFactory = getStreamFactory(format);
 
-      String formatArgs[] = new String[args.length - 1];
+      String[] formatArgs = new String[args.length - 1];
       System.arraycopy(args, 1, formatArgs, 0, formatArgs.length);
 
       String helpString = createHelpString(format, ArgumentParser.createUsage(streamFactory.getParameters()));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
index 631bc34..8243560 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java
@@ -169,7 +169,7 @@ public class ArgumentParser {
 
         // all checks should also be performed for super interfaces
 
-        Method methods[] = proxyInterface.getMethods();
+        Method[] methods = proxyInterface.getMethods();
 
         if (methods.length == 0)
           throw new IllegalArgumentException("proxy interface must at least declare one method!");
@@ -204,7 +204,7 @@ public class ArgumentParser {
 
   private static String methodNameToParameter(String methodName) {
     // remove get from method name
-    char parameterNameChars[] = methodName.toCharArray();
+    char[] parameterNameChars = methodName.toCharArray();
 
     // name length is checked to be at least 4 prior
     parameterNameChars[3] = Character.toLowerCase(parameterNameChars[3]);
@@ -385,7 +385,7 @@ public class ArgumentParser {
    * @return true, if arguments are valid
    */
   @SuppressWarnings({"unchecked"})
-  public static <T> boolean validateArguments(String args[], Class<T> argProxyInterface) {
+  public static <T> boolean validateArguments(String[] args, Class<T> argProxyInterface) {
     return validateArguments(args, new Class[]{argProxyInterface});
   }
 
@@ -398,7 +398,7 @@ public class ArgumentParser {
    * @param argProxyInterfaces interfaces with parameters description
    * @return true, if arguments are valid
    */
-  public static boolean validateArguments(String args[], Class<?>... argProxyInterfaces) {
+  public static boolean validateArguments(String[] args, Class<?>... argProxyInterfaces) {
     return null == validateArgumentsLoudly(args, argProxyInterfaces);
   }
 
@@ -409,7 +409,7 @@ public class ArgumentParser {
    * @param argProxyInterface interface with parameters description
    * @return null, if arguments are valid or error message otherwise
    */
-  public static String validateArgumentsLoudly(String args[], Class<?> argProxyInterface) {
+  public static String validateArgumentsLoudly(String[] args, Class<?> argProxyInterface) {
     return validateArgumentsLoudly(args, new Class[]{argProxyInterface});
   }
 
@@ -420,7 +420,7 @@ public class ArgumentParser {
    * @param argProxyInterfaces interfaces with parameters description
    * @return null, if arguments are valid or error message otherwise
    */
-  public static String validateArgumentsLoudly(String args[], Class<?>... argProxyInterfaces) {
+  public static String validateArgumentsLoudly(String[] args, Class<?>... argProxyInterfaces) {
     // number of parameters must be always be even
     if (args.length % 2 != 0) {
       return "Number of parameters must be always be even";
@@ -478,7 +478,7 @@ public class ArgumentParser {
    *     if the proxy interface is not compatible.
    */
   @SuppressWarnings("unchecked")
-  public static <T> T parse(String args[], Class<T> argProxyInterface) {
+  public static <T> T parse(String[] args, Class<T> argProxyInterface) {
 
     checkProxyInterfaces(argProxyInterface);
 
@@ -533,7 +533,7 @@ public class ArgumentParser {
    * @param <T> T
    * @return arguments pertaining to argProxyInterface
    */
-  public static <T> String[] filter(String args[], Class<T> argProxyInterface) {
+  public static <T> String[] filter(String[] args, Class<T> argProxyInterface) {
     ArrayList<String> parameters = new ArrayList<>(args.length);
 
     for (Method method : argProxyInterface.getMethods()) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
index abe73b4..f320986 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
@@ -29,5 +29,5 @@ public abstract class BasicCmdLineTool extends CmdLineTool {
    *
    * @param args arguments
    */
-  public abstract void run(String args[]);
+  public abstract void run(String[] args);
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
index 9385a18..b575f71 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
@@ -210,7 +210,7 @@ public final class CLI {
     }
 
     final long startTime = System.currentTimeMillis();
-    String toolArguments[] = new String[args.length - 1];
+    String[] toolArguments = new String[args.length - 1];
     System.arraycopy(args, 1, toolArguments, 0, toolArguments.length);
 
     String toolName = args[0];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
index 7ea2a0b..1dfd7bd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java
@@ -204,7 +204,7 @@ public final class CmdLineUtil {
    * @param args arguments
    * @return the index of the parameter in the arguments, or -1 if the parameter is not found
    */
-  public static int getParameterIndex(String param, String args[]) {
+  public static int getParameterIndex(String param, String[] args) {
     for (int i = 0; i < args.length; i++) {
       if (args[i].startsWith("-") && args[i].equals(param)) {
         return i;
@@ -221,7 +221,7 @@ public final class CmdLineUtil {
    * @param args arguments
    * @return parameter value
    */
-  public static String getParameter(String param, String args[]) {
+  public static String getParameter(String param, String[] args) {
     int i = getParameterIndex(param, args);
     if (-1 < i) {
       i++;
@@ -240,7 +240,7 @@ public final class CmdLineUtil {
    * @param args arguments
    * @return parameter value
    */
-  public static Integer getIntParameter(String param, String args[]) {
+  public static Integer getIntParameter(String param, String[] args) {
     String value = getParameter(param, args);
 
     try {
@@ -261,7 +261,7 @@ public final class CmdLineUtil {
    * @param args arguments
    * @return parameter value
    */
-  public static Double getDoubleParameter(String param, String args[]) {
+  public static Double getDoubleParameter(String param, String[] args) {
     String value = getParameter(param, args);
 
     try {
@@ -286,7 +286,7 @@ public final class CmdLineUtil {
     }
   }
 
-  public static boolean containsParam(String param, String args[]) {
+  public static boolean containsParam(String param, String[] args) {
     for (String arg : args) {
       if (arg.equals(param)) {
         return true;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
index f8a0d91..8ae25e6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java
@@ -38,7 +38,7 @@ public abstract class EvaluationErrorPrinter<T> implements EvaluationMonitor<T>
   }
 
   // for the sentence detector
-  protected void printError(Span references[], Span predictions[],
+  protected void printError(Span[] references, Span[] predictions,
       T referenceSample, T predictedSample, String sentence) {
     List<Span> falseNegatives = new ArrayList<>();
     List<Span> falsePositives = new ArrayList<>();
@@ -55,7 +55,7 @@ public abstract class EvaluationErrorPrinter<T> implements EvaluationMonitor<T>
   }
 
   // for namefinder, chunker...
-  protected void printError(String id, Span references[], Span predictions[],
+  protected void printError(String id, Span[] references, Span[] predictions,
       T referenceSample, T predictedSample, String[] sentenceTokens) {
     List<Span> falseNegatives = new ArrayList<>();
     List<Span> falsePositives = new ArrayList<>();
@@ -75,13 +75,13 @@ public abstract class EvaluationErrorPrinter<T> implements EvaluationMonitor<T>
     }
   }
 
-  protected void printError(Span references[], Span predictions[],
+  protected void printError(Span[] references, Span[] predictions,
       T referenceSample, T predictedSample, String[] sentenceTokens) {
     printError(null, references, predictions, referenceSample, predictedSample, sentenceTokens);
   }
 
   // for pos tagger
-  protected void printError(String references[], String predictions[],
+  protected void printError(String[] references, String[] predictions,
       T referenceSample, T predictedSample, String[] sentenceTokens) {
     List<String> filteredDoc = new ArrayList<>();
     List<String> filteredRefs = new ArrayList<>();
@@ -213,7 +213,7 @@ public abstract class EvaluationErrorPrinter<T> implements EvaluationMonitor<T>
    * @param falsePositives
    *          [out] the false positives list
    */
-  private void findErrors(Span references[], Span predictions[],
+  private void findErrors(Span[] references, Span[] predictions,
       List<Span> falseNegatives, List<Span> falsePositives) {
 
     falseNegatives.addAll(Arrays.asList(references));

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java
index 03ce489..714561a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/FineGrainedReportListener.java
@@ -60,7 +60,7 @@ public abstract class FineGrainedReportListener {
 
   private static String generateAlphaLabel(int index) {
 
-    char labelChars[] = new char[3];
+    char[] labelChars = new char[3];
     int i;
 
     for (i = 2; i >= 0; i--) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
index 8bc6b95..4f48bbf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
@@ -36,5 +36,5 @@ public interface ObjectStreamFactory<T> {
    * @param args arguments
    * @return ObjectStream instance
    */
-  ObjectStream<T> create(String args[]);
+  ObjectStream<T> create(String[] args);
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
index bf4b381..85ab2cb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java
@@ -116,7 +116,7 @@ public abstract class TypedCmdLineTool<T>
    * @param format format to work with
    * @param args command line arguments
    */
-  public abstract void run(String format, String args[]);
+  public abstract void run(String format, String[] args);
 
   /**
    * Retrieves a description on how to use the tool.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
index 49a55d3..a01d354 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
@@ -70,7 +70,7 @@ public class DoccatTool extends BasicCmdLineTool {
         while ((document = documentStream.read()) != null) {
           String[] tokens = model.getFactory().getTokenizer().tokenize(document);
 
-          double prob[] = doccat.categorize(tokens);
+          double[] prob = doccat.categorize(tokens);
           String category = doccat.getBestCategory(prob);
 
           DocumentSample sample = new DocumentSample(category, tokens);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
index f248a2c..7f2d334 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
@@ -93,7 +93,7 @@ public class EntityLinkerTool extends BasicCmdLineTool {
             // Run entity linker ... and output result ...
 
             StringBuilder text = new StringBuilder();
-            Span sentences[] = new Span[document.size()];
+            Span[] sentences = new Span[document.size()];
             Span[][] tokensBySentence = new Span[document.size()][];
             Span[][] namesBySentence = new Span[document.size()][];
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
index 153d6f7..0ee3738 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
@@ -68,14 +68,14 @@ public final class TokenNameFinderCrossValidatorTool
       mlParams = new TrainingParameters();
     }
 
-    byte featureGeneratorBytes[] =
+    byte[] featureGeneratorBytes =
         TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
 
     Map<String, Object> resources =
         TokenNameFinderTrainerTool.loadResources(params.getResources(), params.getFeaturegen());
 
     if (params.getNameTypes() != null) {
-      String nameTypes[] = params.getNameTypes().split(",");
+      String[] nameTypes = params.getNameTypes().split(",");
       sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
     }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
index d00e254..b3d5bba 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java
@@ -96,7 +96,7 @@ public final class TokenNameFinderEvaluatorTool
     }
 
     if (params.getNameTypes() != null) {
-      String nameTypes[] = params.getNameTypes().split(",");
+      String[] nameTypes = params.getNameTypes().split(",");
       sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
     }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
index 2476005..59b2f3a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
@@ -53,7 +53,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
       System.out.println(getHelp());
     } else {
 
-      NameFinderME nameFinders[] = new NameFinderME[args.length];
+      NameFinderME[] nameFinders = new NameFinderME[args.length];
 
       for (int i = 0; i < nameFinders.length; i++) {
         TokenNameFinderModel model = new TokenNameFinderModelLoader().load(new File(args[i]));
@@ -71,7 +71,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
             new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
         String line;
         while ((line = untokenizedLineStream.read()) != null) {
-          String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+          String[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
 
           // A new line indicates a new document,
           // adaptive data must be cleared for a new document
@@ -90,7 +90,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
 
           // Simple way to drop intersecting spans, otherwise the
           // NameSample is invalid
-          Span reducedNames[] = NameFinderME.dropOverlappingSpans(
+          Span[] reducedNames = NameFinderME.dropOverlappingSpans(
                   names.toArray(new Span[names.size()]));
 
           NameSample nameSample = new NameSample(whitespaceTokenizerLine,

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index fb73506..5bb18d2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -68,7 +68,7 @@ public final class TokenNameFinderTrainerTool
   }
 
   static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
-    byte featureGeneratorBytes[] = null;
+    byte[] featureGeneratorBytes = null;
     // load descriptor file into memory
     if (featureGenDescriptorFile != null) {
 
@@ -118,7 +118,7 @@ public final class TokenNameFinderTrainerTool
         }
       }
 
-      File resourceFiles[] = resourcePath.listFiles();
+      File[] resourceFiles = resourcePath.listFiles();
 
       for (File resourceFile : resourceFiles) {
         String resourceName = resourceFile.getName();
@@ -172,7 +172,7 @@ public final class TokenNameFinderTrainerTool
 
     File modelOutFile = params.getModel();
 
-    byte featureGeneratorBytes[] = openFeatureGeneratorBytes(params.getFeaturegen());
+    byte[] featureGeneratorBytes = openFeatureGeneratorBytes(params.getFeaturegen());
 
 
     // TODO: Support Custom resources:
@@ -184,7 +184,7 @@ public final class TokenNameFinderTrainerTool
     CmdLineUtil.checkOutputFile("name finder model", modelOutFile);
 
     if (params.getNameTypes() != null) {
-      String nameTypes[] = params.getNameTypes().split(",");
+      String[] nameTypes = params.getNameTypes().split(",");
       sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
     }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
index dc93226..3f1959e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
@@ -63,7 +63,7 @@ public final class POSTaggerTool extends BasicCmdLineTool {
         String line;
         while ((line = lineStream.read()) != null) {
 
-          String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+          String[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
           String[] tags = tagger.tag(whitespaceTokenizerLine);
 
           POSSample sample = new POSSample(whitespaceTokenizerLine, tags);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
index 57176ae..30f408b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
@@ -58,7 +58,7 @@ public final class DictionaryDetokenizerTool extends BasicCmdLineTool {
         while ((tokenizedLine = tokenizedLineStream.read()) != null) {
 
           // white space tokenize line
-          String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine);
+          String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine);
 
           System.out.println(detokenizer.detokenize(tokens, null));
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
index 3fd8986..10b9f37 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
@@ -275,7 +275,7 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
     while ((line = lineReader.readLine()) != null) {
       StringTokenizer whiteSpaceTokenizer = new StringTokenizer(line, " ");
 
-      String tokens[] = new String[whiteSpaceTokenizer.countTokens()];
+      String[] tokens = new String[whiteSpaceTokenizer.countTokens()];
 
       if (tokens.length > 0) {
         int tokenIndex = 0;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
index c8ad3c3..88bf8f9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
@@ -32,7 +32,7 @@ public interface DocumentCategorizer {
    * @param text the tokens of text to categorize
    * @return per category probabilities
    */
-  double[] categorize(String text[]);
+  double[] categorize(String[] text);
 
   /**
    * Categorizes the given text, provided in separate tokens.
@@ -43,7 +43,7 @@ public interface DocumentCategorizer {
    * @deprecated will be removed after 1.7.1 release. Don't use it.
    */
   @Deprecated
-  double[] categorize(String text[], Map<String, Object> extraInformation);
+  double[] categorize(String[] text, Map<String, Object> extraInformation);
 
   /**
    * get the best category from previously generated outcome probabilities
@@ -101,7 +101,7 @@ public interface DocumentCategorizer {
    * @param results the probabilities of each category
    * @return the name of the outcome
    */
-  String getAllResults(double results[]);
+  String getAllResults(double[] results);
 
   /**
    * Returns a map in which the key is the category name and the value is the score

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
index b1da3e3..e12f16b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
@@ -32,7 +32,7 @@ class DocumentCategorizerContextGenerator {
     mFeatureGenerators = featureGenerators;
   }
 
-  public String[] getContext(String text[], Map<String, Object> extraInformation) {
+  public String[] getContext(String[] text, Map<String, Object> extraInformation) {
 
     Collection<String> context = new LinkedList<>();
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
index d2307e3..63e0768 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
@@ -57,9 +57,9 @@ public class DocumentCategorizerEvaluator extends Evaluator<DocumentSample> {
    */
   public DocumentSample processSample(DocumentSample sample) {
 
-    String document[] = sample.getText();
+    String[] document = sample.getText();
 
-    double probs[] = categorizer.categorize(document, sample.getExtraInformation());
+    double[] probs = categorizer.categorize(document, sample.getExtraInformation());
 
     String cat = categorizer.getBestCategory(probs);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
index 33151d9..e743b9d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
@@ -85,7 +85,7 @@ public class DocumentCategorizerME implements DocumentCategorizer {
    * Categorizes the given text.
    * @param text the text to categorize
    */
-  public double[] categorize(String text[]) {
+  public double[] categorize(String[] text) {
     return this.categorize(text, Collections.emptyMap());
   }
 
@@ -225,7 +225,7 @@ public class DocumentCategorizerME implements DocumentCategorizer {
     return model.getMaxentModel().getNumOutcomes();
   }
 
-  public String getAllResults(double results[]) {
+  public String getAllResults(double[] results) {
     return model.getMaxentModel().getAllOutcomes(results);
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
index f521738..3d107fa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
@@ -39,11 +39,11 @@ public class DocumentSample {
     this(category, WhitespaceTokenizer.INSTANCE.tokenize(text));
   }
 
-  public DocumentSample(String category, String text[]) {
+  public DocumentSample(String category, String[] text) {
     this(category, text, null);
   }
 
-  public DocumentSample(String category, String text[], Map<String, Object> extraInformation) {
+  public DocumentSample(String category, String[] text, Map<String, Object> extraInformation) {
     Objects.requireNonNull(text, "text must not be null");
 
     this.category = Objects.requireNonNull(category, "category must not be null");

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
index 13d9184..9054eb7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
@@ -45,13 +45,13 @@ public class DocumentSampleStream extends FilterObjectStream<String, DocumentSam
     if (sampleString != null) {
 
       // Whitespace tokenize entire string
-      String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(sampleString);
+      String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(sampleString);
 
       DocumentSample sample;
 
       if (tokens.length > 1) {
         String category = tokens[0];
-        String docTokens[] = new String[tokens.length - 1];
+        String[] docTokens = new String[tokens.length - 1];
         System.arraycopy(tokens, 1, docTokens, 0, tokens.length - 1);
 
         sample = new DocumentSample(category, docTokens);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
index b3ac623..ff4ad34 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
@@ -87,7 +87,7 @@ public class BioNLP2004NameSampleStream implements ObjectStream<NameSample> {
       if (line.contains("ABSTRACT TRUNCATED"))
         continue;
 
-      String fields[] = line.split("\t");
+      String[] fields = line.split("\t");
 
       if (fields.length == 2) {
         sentence.add(fields[0]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
index cd68148..f3c2a81 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
@@ -125,7 +125,7 @@ public class Conll02NameSampleStream implements ObjectStream<NameSample> {
         continue;
       }
 
-      String fields[] = line.split(" ");
+      String[] fields = line.split(" ");
 
       if (fields.length == 3) {
         sentence.add(fields[0]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
index 6892605..5f1c082 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
@@ -93,7 +93,7 @@ public class Conll03NameSampleStream implements ObjectStream<NameSample> {
         continue;
       }
 
-      String fields[] = line.split(" ");
+      String[] fields = line.split(" ");
 
       // For English: WORD  POS-TAG SC-TAG NE-TAG
       if (LANGUAGE.EN.equals(lang) && fields.length == 4) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
index 74ae62a..9525ab6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
@@ -76,7 +76,7 @@ public class ConllXPOSSampleStream extends FilterObjectStream<String, POSSample>
 
         final int minNumberOfFields = 5;
 
-        String parts[] = line.split("\t");
+        String[] parts = line.split("\t");
 
         if (parts.length >= minNumberOfFields) {
           tokens.add(parts[1]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
index 931cb55..3a5621a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
@@ -43,7 +43,7 @@ public class DirectorySampleStream implements ObjectStream<File> {
 
   private Stack<File> textFiles = new Stack<>();
 
-  public DirectorySampleStream(File dirs[], FileFilter fileFilter, boolean recursive) {
+  public DirectorySampleStream(File[] dirs, FileFilter fileFilter, boolean recursive) {
     this.fileFilter = fileFilter;
     isRecursiveScan = recursive;
 
@@ -73,7 +73,7 @@ public class DirectorySampleStream implements ObjectStream<File> {
     while (textFiles.isEmpty() && !directories.isEmpty()) {
       File dir = directories.pop();
 
-      File files[];
+      File[] files;
 
       if (fileFilter != null) {
         files = dir.listFiles(fileFilter);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
index d09c0b3..531a50f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
@@ -138,7 +138,7 @@ public class EvalitaNameSampleStream implements ObjectStream<NameSample> {
         continue;
       }
 
-      String fields[] = line.split(" ");
+      String[] fields = line.split(" ");
 
       // For Italian: WORD  POS-TAG SC-TAG NE-TAG
       if (LANGUAGE.IT.equals(lang) && fields.length == 4) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
index 321f7c4..1ca0484 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
@@ -87,7 +87,7 @@ public class LeipzigDoccatSampleStream extends
     String line;
     while (count < sentencesPerDocument && (line = samples.read()) != null) {
 
-      String tokens[] = tokenizer.tokenize(line);
+      String[] tokens = tokenizer.tokenize(line);
 
       if (tokens.length == 0) {
         throw new IOException("Empty lines are not allowed!");

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
index f0aea5e..bd2453b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
@@ -51,7 +51,7 @@ public class LeipzigDocumentSampleStreamFactory
     Parameters params = ArgumentParser.parse(args, Parameters.class);
     File sentencesFileDir = params.getSentencesDir();
 
-    File sentencesFiles[] = sentencesFileDir.listFiles(new FilenameFilter() {
+    File[] sentencesFiles = sentencesFileDir.listFiles(new FilenameFilter() {
       @Override
       public boolean accept(File dir, String name) {
         return name.contains("sentences") && name.endsWith(".txt");
@@ -59,7 +59,7 @@ public class LeipzigDocumentSampleStreamFactory
     });
 
     @SuppressWarnings("unchecked")
-    ObjectStream<DocumentSample> sampleStreams[] =
+    ObjectStream<DocumentSample>[] sampleStreams =
         new ObjectStream[sentencesFiles.length];
 
     for (int i = 0; i < sentencesFiles.length; i++) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
index 76e3d0f..efeddba 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
@@ -40,7 +40,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
     static final int ID_OFFSET = 0;
     static final int TYPE_OFFSET = 1;
 
-    BratAnnotation parse(Span tokens[], CharSequence line) throws IOException {
+    BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
       return null;
     }
 
@@ -60,7 +60,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
     private static final int END_OFFSET = 3;
 
     @Override
-    BratAnnotation parse(Span values[], CharSequence line) throws IOException {
+    BratAnnotation parse(Span[] values, CharSequence line) throws IOException {
 
       if (values.length > 4) {
         String type = values[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString();
@@ -111,7 +111,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
     }
 
     @Override
-    BratAnnotation parse(Span tokens[], CharSequence line) throws IOException {
+    BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
       return new RelationAnnotation(tokens[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString(),
           tokens[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString(),
           parseArg(tokens[ARG1_OFFSET].getCoveredText(line).toString()),
@@ -122,7 +122,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
   static class EventAnnotationParser extends BratAnnotationParser {
 
     @Override
-    BratAnnotation parse(Span tokens[], CharSequence line) throws IOException {
+    BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
 
       String[] typeParts = tokens[TYPE_OFFSET].getCoveredText(line).toString().split(":");
 
@@ -194,7 +194,7 @@ public class BratAnnotationStream implements ObjectStream<BratAnnotation> {
     String line = reader.readLine();
 
     if (line != null) {
-      Span tokens[] = WhitespaceTokenizer.INSTANCE.tokenizePos(line);
+      Span[] tokens = WhitespaceTokenizer.INSTANCE.tokenizePos(line);
 
       if (tokens.length > 2) {
         String annId = tokens[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java
index 16c9deb..1b9aee2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocument.java
@@ -78,7 +78,7 @@ public class BratDocument {
 
     StringBuilder text = new StringBuilder();
 
-    char cbuf[] = new char[1024];
+    char[] cbuf = new char[1024];
 
     int len;
     while ((len = txtReader.read(cbuf)) > 0) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
index a569992..5a96d2d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
@@ -111,7 +111,7 @@ public class BratNameSampleStream extends SegmenterObjectStream<BratDocument, Na
       String sentenceText = sentence.getCoveredText(
           sample.getText()).toString();
 
-      Span tokens[] = tokenizer.tokenizePos(sentenceText);
+      Span[] tokens = tokenizer.tokenizePos(sentenceText);
 
       // Note:
       // A begin and end token index can be identical, but map to different

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java
index 0367b95..b7dedbb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToByteArraySampleStream.java
@@ -38,7 +38,7 @@ public class FileToByteArraySampleStream extends FilterObjectStream<File, byte[]
     ByteArrayOutputStream bytes = new ByteArrayOutputStream();
 
     try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
-      byte buffer[] = new byte[1024];
+      byte[] buffer = new byte[1024];
       int length;
       while ((length = in.read(buffer, 0, buffer.length)) > 0) {
         bytes.write(buffer, 0, length);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java
index 3ca641c..3b0476a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java
@@ -45,7 +45,7 @@ public class FileToStringSampleStream extends FilterObjectStream<File, String> {
     StringBuilder text = new StringBuilder();
 
     try {
-      char buffer[] = new char[1024];
+      char[] buffer = new char[1024];
       int length;
       while ((length = in.read(buffer, 0, buffer.length)) > 0) {
         text.append(buffer, 0, length);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
index 4d6d3a4..e25d674 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java
@@ -106,7 +106,7 @@ public class MucNameContentHandler extends SgmlParser.ContentHandler {
   @Override
   public void characters(CharSequence chars) {
     if (isInsideContentElement) {
-      String tokens [] = tokenizer.tokenize(chars.toString());
+      String[] tokens = tokenizer.tokenize(chars.toString());
       text.addAll(Arrays.asList(tokens));
     }
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
index 41e5aa9..af2b5c8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
@@ -105,7 +105,7 @@ public class OntoNotesNameSampleStream extends
             break;
           }
 
-          String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+          String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(line);
 
           List<Span> entities = new LinkedList<>();
           List<String> cleanedTokens = new ArrayList<>(tokens.length);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
index 7056538..a4d5c8c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
@@ -41,9 +41,9 @@ public class LemmaSampleSequenceStream implements SequenceStream {
     LemmaSample sample = samples.read();
 
     if (sample != null) {
-      String sentence[] = sample.getTokens();
-      String tags[] = sample.getTags();
-      String preds[] = sample.getLemmas();
+      String[] sentence = sample.getTokens();
+      String[] tags = sample.getTags();
+      String[] preds = sample.getLemmas();
       Event[] events = new Event[sentence.length];
 
       for (int i = 0; i < sentence.length; i++) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
index ddcaa6a..f21f9e3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
@@ -30,6 +30,6 @@ public interface Lemmatizer {
    *
    * @return an array of lemma classes for each token in the sequence.
    */
-  String[] lemmatize(String[] toks, String tags[]);
+  String[] lemmatize(String[] toks, String[] tags);
 
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
index 4ce8b7e..949a408 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
@@ -171,7 +171,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> {
 
   public Sequence bestSequence(T[] sequence, Object[] additionalContext,
       BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
-    Sequence sequences[] =  bestSequences(1, sequence, additionalContext, cg, validator);
+    Sequence[] sequences =  bestSequences(1, sequence, additionalContext, cg, validator);
 
     if (sequences.length > 0)
       return sequences[0];
@@ -181,7 +181,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> {
 
   @Override
   public String[] getOutcomes() {
-    String outcomes[] = new String[model.getNumOutcomes()];
+    String[] outcomes = new String[model.getNumOutcomes()];
 
     for (int i = 0; i < model.getNumOutcomes(); i++) {
       outcomes[i] = model.getOutcome(i);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
index 14c7fa3..b8b830e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
@@ -156,7 +156,7 @@ public final class GISModel extends AbstractModel {
   public static double[] eval(int[] context, float[] values, double[] prior,
       EvalParameters model) {
     Context[] params = model.getParams();
-    int numfeats[] = new int[model.getNumOutcomes()];
+    int[] numfeats = new int[model.getNumOutcomes()];
     int[] activeOutcomes;
     double[] activeParameters;
     double value = 1;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java
index ea26781..c0c8b1d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java
@@ -42,7 +42,7 @@ public interface MaxentModel {
      *         outcomes, all of which sum to 1.
      * @return an array of the probabilities for each of the different outcomes, all of which sum to 1.
      **/
-  double[] eval(String[] context, double probs[]);
+  double[] eval(String[] context, double[] probs);
 
   /**
    * Evaluates a contexts with the specified context values.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
index 3e4cef1..7a50055 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java
@@ -250,7 +250,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
         //training feature count computation
         for (int ei = 0; ei < events.length; ei++, oei++) {
           String[] contextStrings = events[ei].getContext();
-          float values[] = events[ei].getValues();
+          float[] values = events[ei].getValues();
           int oi = omap.get(events[ei].getOutcome());
           for (int ci = 0; ci < contextStrings.length; ci++) {
             float value = 1;
@@ -272,7 +272,7 @@ public class SimplePerceptronSequenceTrainer extends AbstractEventModelSequenceT
         // {System.err.print(" "+taggerEvents[ei].getOutcome());} System.err.println();
         for (Event taggerEvent : taggerEvents) {
           String[] contextStrings = taggerEvent.getContext();
-          float values[] = taggerEvent.getValues();
+          float[] values = taggerEvent.getValues();
           int oi = omap.get(taggerEvent.getOutcome());
           for (int ci = 0; ci < contextStrings.length; ci++) {
             float value = 1;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
index 284ae87..2218021 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java
@@ -76,7 +76,7 @@ public class BioCodec implements SequenceCodec<String> {
     return spans.toArray(new Span[spans.size()]);
   }
 
-  public String[] encode(Span names[], int length) {
+  public String[] encode(Span[] names, int length) {
     String[] outcomes = new String[length];
     for (int i = 0; i < outcomes.length; i++) {
       outcomes[i] = BioCodec.OTHER;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java
index 7f913f9..83318e4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java
@@ -36,7 +36,7 @@ import opennlp.tools.util.featuregen.WindowFeatureGenerator;
  */
 public class DefaultNameContextGenerator implements NameContextGenerator {
 
-  private AdaptiveFeatureGenerator featureGenerators[];
+  private AdaptiveFeatureGenerator[] featureGenerators;
 
   @Deprecated
   private static AdaptiveFeatureGenerator windowFeatures = new CachedFeatureGenerator(
@@ -73,7 +73,7 @@ public class DefaultNameContextGenerator implements NameContextGenerator {
   }
 
   public void addFeatureGenerator(AdaptiveFeatureGenerator generator) {
-    AdaptiveFeatureGenerator generators[] = featureGenerators;
+    AdaptiveFeatureGenerator[] generators = featureGenerators;
 
     featureGenerators = new AdaptiveFeatureGenerator[featureGenerators.length + 1];
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
index 8b655eb..d186ef9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
@@ -62,7 +62,7 @@ public class DictionaryNameFinder implements TokenNameFinder {
 
     for (int offsetFrom = 0; offsetFrom < textTokenized.length; offsetFrom++) {
       Span nameFound = null;
-      String tokensSearching[];
+      String[] tokensSearching;
 
       for (int offsetTo = offsetFrom; offsetTo < textTokenized.length; offsetTo++) {
         int lengthSearching = offsetTo - offsetFrom + 1;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
index 0afd3c1..f67163c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
@@ -132,7 +132,7 @@ public class NameFinderEventStream extends opennlp.tools.util.AbstractEventStrea
       overrideType(names);
     }
 
-    String outcomes[] = codec.encode(names, sample.getSentence().length);
+    String[] outcomes = codec.encode(names, sample.getSentence().length);
     // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length);
     additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext());
     String[] tokens = new String[sample.getSentence().length];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
index 5a16f34..1d52473 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
@@ -302,7 +302,7 @@ public class NameFinderME implements TokenNameFinder {
    *
    * @return non-overlapping spans
    */
-  public static Span[] dropOverlappingSpans(Span spans[]) {
+  public static Span[] dropOverlappingSpans(Span[] spans) {
 
     List<Span> sortedSpans = new ArrayList<>(spans.length);
     Collections.addAll(sortedSpans, spans);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
index f2f4578..8858ceb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
@@ -179,7 +179,7 @@ public class NameSample {
     return result.toString();
   }
 
-  private static String errorTokenWithContext(String sentence[], int index) {
+  private static String errorTokenWithContext(String[] sentence, int index) {
 
     StringBuilder errorString = new StringBuilder();
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
index cf19bf2..8064d6b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
@@ -86,8 +86,8 @@ public class NameSampleSequenceStream implements SequenceStream {
   public Sequence read() throws IOException {
     NameSample sample = psi.read();
     if (sample != null) {
-      String sentence[] = sample.getSentence();
-      String tags[] = seqCodec.encode(sample.getNames(), sentence.length);
+      String[] sentence = sample.getSentence();
+      String[] tags = seqCodec.encode(sample.getNames(), sentence.length);
       Event[] events = new Event[sentence.length];
 
       for (int i = 0; i < sentence.length; i++) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
index 5d9847d..7d7c6bd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
@@ -32,7 +32,7 @@ import opennlp.tools.util.Span;
  */
 public final class RegexNameFinder implements TokenNameFinder {
 
-  private Pattern mPatterns[];
+  private Pattern[] mPatterns;
   private String sType;
   private Map<String, Pattern[]> regexMap;
 
@@ -40,7 +40,7 @@ public final class RegexNameFinder implements TokenNameFinder {
     this.regexMap = Objects.requireNonNull(regexMap, "regexMap must not be null");
   }
 
-  public RegexNameFinder(Pattern patterns[], String type) {
+  public RegexNameFinder(Pattern[] patterns, String type) {
     if (patterns == null || patterns.length == 0) {
       throw new IllegalArgumentException("patterns must not be null or empty!");
     }
@@ -55,7 +55,7 @@ public final class RegexNameFinder implements TokenNameFinder {
    * {@link #RegexNameFinder(Map)}
    */
   @Deprecated
-  public RegexNameFinder(Pattern patterns[]) {
+  public RegexNameFinder(Pattern[] patterns) {
     if (patterns == null || patterns.length == 0) {
       throw new IllegalArgumentException("patterns must not be null or empty!");
     }
@@ -65,7 +65,7 @@ public final class RegexNameFinder implements TokenNameFinder {
   }
 
   @Override
-  public Span[] find(String tokens[]) {
+  public Span[] find(String[] tokens) {
     Map<Integer, Integer> sentencePosTokenMap = new HashMap<>();
 
     StringBuilder sentenceString = new StringBuilder(tokens.length * 10);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java
index 3b5162e..c9de988 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java
@@ -30,7 +30,7 @@ public interface TokenNameFinder {
    * @param tokens an array of the tokens or words of the sequence, typically a sentence.
    * @return an array of spans for each of the names identified.
    */
-  Span[] find(String tokens[]);
+  Span[] find(String[] tokens);
 
   /**
    * Forgets all adaptive data which was collected during previous

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
index 6a68b86..df8866f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
@@ -36,9 +36,9 @@ public class TokenNameFinderCrossValidator {
 
   private class DocumentSample {
 
-    private NameSample samples[];
+    private NameSample[] samples;
 
-    DocumentSample(NameSample samples[]) {
+    DocumentSample(NameSample[] samples) {
       this.samples = samples;
     }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java
index d58527b..a84ebb8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java
@@ -73,8 +73,8 @@ public class TokenNameFinderEvaluator extends Evaluator<NameSample> {
       nameFinder.clearAdaptiveData();
     }
 
-    Span predictedNames[] = nameFinder.find(reference.getSentence());
-    Span references[] = reference.getNames();
+    Span[] predictedNames = nameFinder.find(reference.getSentence());
+    Span[] references = reference.getNames();
 
     // OPENNLP-396 When evaluating with a file in the old format
     // the type of the span is null, but must be set to default to match

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
index e7f0190..f570be3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
@@ -80,7 +80,7 @@ public class TokenNameFinderFactory extends BaseToolFactory {
         throw new IllegalStateException("Classpath must contain ner-default-features.xml file!");
       }
 
-      byte buf[] = new byte[1024];
+      byte[] buf = new byte[1024];
       int len;
       while ((len = in.read(buf)) > 0) {
         bytes.write(buf, 0, len);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
index 09eefc5..5b72449 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
@@ -211,7 +211,7 @@ public class TokenNameFinderModel extends BaseModel {
 
   private boolean isModelValid(MaxentModel model) {
 
-    String outcomes[] = new String[model.getNumOutcomes()];
+    String[] outcomes = new String[model.getNumOutcomes()];
 
     for (int i = 0; i < model.getNumOutcomes(); i++) {
       outcomes[i] = model.getOutcome(i);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
index b0fc3e4..a553328 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
@@ -388,9 +388,9 @@ public abstract class AbstractBottomUpParser implements Parser {
   protected Parse[] advanceChunks(final Parse p, double minChunkScore) {
     // chunk
     Parse[] children = p.getChildren();
-    String words[] = new String[children.length];
-    String ptags[] = new String[words.length];
-    double probs[] = new double[words.length];
+    String[] words = new String[children.length];
+    String[] ptags = new String[words.length];
+    double[] probs = new double[words.length];
 
     for (int i = 0, il = children.length; i < il; i++) {
       Parse sp = children[i];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
index b9733b6..7d37fcb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
@@ -160,7 +160,7 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
     features.add(ctbo0 + "," + ct1);
     features.add(ct0 + "," + ctbo1);
     features.add(ctbo0 + "," + ctbo1);
-    String contexts[] = features.toArray(new String[features.size()]);
+    String[] contexts = features.toArray(new String[features.size()]);
     if (contextsCache != null) {
       contextsCache.put(cacheKey,contexts);
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
index b19d480..7d3c8f7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
@@ -35,7 +35,7 @@ public class ParserChunkerFactory extends ChunkerFactory {
 
     MaxentModel model = artifactProvider.getArtifact("chunker.model");
 
-    String outcomes[] = new String[model.getNumOutcomes()];
+    String[] outcomes = new String[model.getNumOutcomes()];
     for (int i = 0; i < outcomes.length; i++) {
       outcomes[i] = model.getOutcome(i);
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fdff127b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
index 6b748a6..ef15bf5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
@@ -27,7 +27,7 @@ public class ParserChunkerSequenceValidator implements SequenceValidator<String>
 
   private Map<String, String> continueStartMap;
 
-  public ParserChunkerSequenceValidator(String outcomes[]) {
+  public ParserChunkerSequenceValidator(String[] outcomes) {
 
     continueStartMap = new HashMap<>(outcomes.length);
     for (int oi = 0, on = outcomes.length; oi < on; oi++) {

[22/50] [abbrv] opennlp git commit: OPENNLP-978: Set name finder defaults to perceptron and cutoff zero

Posted by jo...@apache.org.

OPENNLP-978: Set name finder defaults to perceptron and cutoff zero


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/91352d5f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/91352d5f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/91352d5f

Branch: refs/heads/parser_regression
Commit: 91352d5fe620ad3fab988222878d4953603db6e3
Parents: 672f1b0
Author: J�rn Kottmann <jo...@apache.org>
Authored: Tue Feb 7 23:58:43 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:53 2017 +0200

----------------------------------------------------------------------
 .../namefind/TokenNameFinderCrossValidatorTool.java    |  4 ++--
 .../cmdline/namefind/TokenNameFinderTrainerTool.java   |  3 ++-
 .../main/java/opennlp/tools/namefind/NameFinderME.java |  6 ++++++
 .../java/opennlp/tools/util/TrainingParameters.java    | 13 +++++++++++++
 .../java/opennlp/tools/namefind/NameFinderMETest.java  |  1 +
 5 files changed, 24 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
index 333abd9..153d6f7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
@@ -42,8 +42,8 @@ import opennlp.tools.namefind.TokenNameFinderEvaluationMonitor;
 import opennlp.tools.namefind.TokenNameFinderFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceCodec;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.eval.EvaluationMonitor;
-import opennlp.tools.util.model.ModelUtil;
 
 public final class TokenNameFinderCrossValidatorTool
     extends AbstractCrossValidatorTool<NameSample, CVToolParams> {
@@ -65,7 +65,7 @@ public final class TokenNameFinderCrossValidatorTool
 
     mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
     if (mlParams == null) {
-      mlParams = ModelUtil.createDefaultTrainingParameters();
+      mlParams = new TrainingParameters();
     }
 
     byte featureGeneratorBytes[] =

http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index a8d4417..fb73506 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -40,6 +40,7 @@ import opennlp.tools.namefind.TokenNameFinderFactory;
 import opennlp.tools.namefind.TokenNameFinderModel;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceCodec;
+import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.featuregen.GeneratorFactory;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.ModelUtil;
@@ -166,7 +167,7 @@ public final class TokenNameFinderTrainerTool
 
     mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
     if (mlParams == null) {
-      mlParams = ModelUtil.createDefaultTrainingParameters();
+      mlParams = new TrainingParameters();
     }
 
     File modelOutFile = params.getModel();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
index 6ce0b83..5a16f34 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
@@ -37,6 +37,7 @@ import opennlp.tools.ml.TrainerFactory.TrainerType;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.ml.perceptron.PerceptronTrainer;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Sequence;
 import opennlp.tools.util.SequenceCodec;
@@ -219,6 +220,11 @@ public class NameFinderME implements TokenNameFinder {
   public static TokenNameFinderModel train(String languageCode, String type,
           ObjectStream<NameSample> samples, TrainingParameters trainParams,
           TokenNameFinderFactory factory) throws IOException {
+
+    trainParams.putIfAbsent(TrainingParameters.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE);
+    trainParams.putIfAbsent(TrainingParameters.CUTOFF_PARAM, "0");
+    trainParams.putIfAbsent(TrainingParameters.ITERATIONS_PARAM, "300");
+
     String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER);
 
     int beamSize = NameFinderME.DEFAULT_BEAM_SIZE;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java b/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
index 188446c..3f21623 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
@@ -130,6 +130,19 @@ public class TrainingParameters {
     return params;
   }
 
+  public void putIfAbsent(String namespace, String key, String value) {
+    if (namespace == null) {
+      parameters.putIfAbsent(key, value);
+    }
+    else {
+      parameters.putIfAbsent(namespace + "." + key, value);
+    }
+  }
+
+  public void putIfAbsent(String key, String value) {
+    putIfAbsent(null, key, value);
+  }
+
   public void put(String namespace, String key, String value) {
 
     if (namespace == null) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/91352d5f/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
index eded5c5..494af62 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
@@ -285,6 +285,7 @@ public class NameFinderMETest {
         new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8"));
 
     TrainingParameters params = new TrainingParameters();
+    params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
     params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
     params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

[28/50] [abbrv] opennlp git commit: OPENNLP-229: Add test for NameFinderSequenceValidator

Posted by jo...@apache.org.

OPENNLP-229: Add test for NameFinderSequenceValidator

This closes #125


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/40cdacb5
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/40cdacb5
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/40cdacb5

Branch: refs/heads/parser_regression
Commit: 40cdacb55583cf70d7e47b26fc0108fa71f3ab51
Parents: ebb5b24
Author: Peter Thygesen <pe...@gmail.com>
Authored: Wed Feb 15 21:12:48 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:54 2017 +0200

----------------------------------------------------------------------
 .../NameFinderSequenceValidatorTest.java        | 186 +++++++++++++++++++
 1 file changed, 186 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/40cdacb5/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
new file mode 100644
index 0000000..35752c1
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderSequenceValidatorTest.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.namefind;
+
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * This is the test class for {@link NameFinderSequenceValidator}..
+ */
+public class NameFinderSequenceValidatorTest {
+
+  private static NameFinderSequenceValidator validator = new NameFinderSequenceValidator();
+  private static String START_A = "TypeA-" + NameFinderME.START;
+  private static String CONTINUE_A = "TypeA-" + NameFinderME.CONTINUE;
+  private static String START_B = "TypeB-" + NameFinderME.START;
+  private static String CONTINUE_B = "TypeB-" + NameFinderME.CONTINUE;
+  private static String OTHER = NameFinderME.OTHER;
+
+  @Test
+  public void testContinueCannotBeFirstOutcome() {
+
+    final String outcome = CONTINUE_A;
+
+    String[] inputSequence = new String[] {"PersonA", "is", "here"};
+    String[] outcomesSequence = new String[] {};
+    Assert.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+
+  }
+
+  @Test
+  public void testContinueAfterStartAndSameType() {
+
+    final String outcome = CONTINUE_A;
+
+    // previous start, same name type
+    String[] inputSequence = new String[] {"Stefanie", "Schmidt", "is", "German"};
+    String[] outcomesSequence = new String[] {START_A};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+  }
+
+  @Ignore
+  @Test
+  public void testContinueAfterStartAndNotSameType() {
+
+    final String outcome = CONTINUE_B;
+
+    // previous start, not same name type
+    String[] inputSequence = new String[] {"PersonA", "LocationA", "something"};
+    String[] outcomesSequence = new String[] {START_A};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testContinueAfterContinueAndSameType() {
+
+    final String outcome = CONTINUE_A;
+
+    // previous continue, same name type
+    String[] inputSequence = new String[] {"FirstName", "MidleName", "LastName", "is", "a", "long", "name"};
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testContinueAfterContinueAndNotSameType() {
+
+    final String outcome = CONTINUE_B;
+
+    // previous continue, not same name type
+    String[] inputSequence = new String[] {"FirstName", "LastName", "LocationA", "something"};
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+    Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testContinueAfterOther() {
+
+    final String outcome = CONTINUE_A;
+
+    // previous other
+    String[] inputSequence = new String[] {"something", "is", "wrong", "here"};
+    String[] outcomesSequence = new String[] {OTHER};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testStartIsAlwaysAValidOutcome() {
+
+    final String outcome = START_A;
+
+    // pos zero
+    String[] inputSequence = new String[] {"PersonA", "is", "here"};
+    String[] outcomesSequence = new String[] {};
+    Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+
+    // pos one, previous other
+    inputSequence = new String[] {"it's", "PersonA", "again"};
+    outcomesSequence = new String[] {OTHER};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+    // pos one, previous start
+    inputSequence = new String[] {"PersonA", "PersonB", "something"};
+    outcomesSequence = new String[] {START_A};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+    // pos two, previous other
+    inputSequence = new String[] {"here", "is", "PersonA"};
+    outcomesSequence = new String[] {OTHER, OTHER};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+    // pos two, previous start, same name type
+    inputSequence = new String[] {"is", "PersonA", "PersoneB"};
+    outcomesSequence = new String[] {OTHER, START_A};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+    // pos two, previous start, different name type
+    inputSequence = new String[] {"something", "PersonA", "OrganizationA"};
+    outcomesSequence = new String[] {OTHER, START_B};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+    // pos two, previous continue, same name type
+    inputSequence = new String[] {"Stefanie", "Schmidt", "PersonB", "something"};
+    outcomesSequence = new String[] {START_A, CONTINUE_A};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+    // pos two, previous continue, not same name type
+    inputSequence = new String[] {"Stefanie", "Schmidt", "OrganizationA", "something"};
+    outcomesSequence = new String[] {START_B, CONTINUE_B};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+  }
+
+  @Test
+  public void testOtherIsAlwaysAValidOutcome() {
+
+    final String outcome = OTHER;
+
+    // pos zero
+    String[] inputSequence = new String[] {"it's", "a", "test"};
+    String[] outcomesSequence = new String[] {};
+    Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+
+    // pos one, previous other
+    inputSequence = new String[] {"it's", "a", "test"};
+    outcomesSequence = new String[] {OTHER};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+    // pos one, previous start
+    inputSequence = new String[] {"Mike", "is", "here"};
+    outcomesSequence = new String[] {START_A};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+    // pos two, previous other
+    inputSequence = new String[] {"it's", "a", "test"};
+    outcomesSequence = new String[] {OTHER, OTHER};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+    // pos two, previous start
+    inputSequence = new String[] {"is", "Mike", "here"};
+    outcomesSequence = new String[] {OTHER, START_A};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+    // pos two, previous continue
+    inputSequence = new String[] {"Stefanie", "Schmidt", "lives", "at", "home"};
+    outcomesSequence = new String[] {START_A, CONTINUE_A};
+    Assert.assertTrue(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+  }
+
+}

[33/50] [abbrv] opennlp git commit: OPENNLP-997: Exclude the generated stemmer code from the coverage report, this closes apache/opennlp#135

Posted by jo...@apache.org.

OPENNLP-997: Exclude the generated stemmer code from the coverage report, this closes apache/opennlp#135


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/20d0a76f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/20d0a76f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/20d0a76f

Branch: refs/heads/parser_regression
Commit: 20d0a76fe092993c25abf7aa3dfce34bcb72db5f
Parents: 76609f5
Author: smarthi <sm...@apache.org>
Authored: Tue Feb 28 08:28:05 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:55 2017 +0200

----------------------------------------------------------------------
 pom.xml | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/20d0a76f/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 45d3c37..12c9ee6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -126,7 +126,7 @@
 		<checkstyle.plugin.version>2.17</checkstyle.plugin.version>
 		<opennlp.forkCount>1.0C</opennlp.forkCount>
 		<coveralls.maven.plugin>4.3.0</coveralls.maven.plugin>
-		<jacoco.maven.plugin>0.7.8</jacoco.maven.plugin>
+		<jacoco.maven.plugin>0.7.9</jacoco.maven.plugin>
 		<maven.surefire.plugin>2.19.1</maven.surefire.plugin>
 	</properties>
 
@@ -185,7 +185,13 @@
 				<plugin>
 					<groupId>org.jacoco</groupId>
 					<artifactId>jacoco-maven-plugin</artifactId>
-					<version>0.7.8</version>
+					<version>${jacoco.maven.plugin}</version>
+					<configuration>
+						<excludes>
+                            <exclude>**/stemmer/*</exclude>
+							<exclude>**/stemmer/snowball/*</exclude>
+                        </excludes>
+					</configuration>
 					<executions>
 						<execution>
 							<id>jacoco-prepare-agent</id>
@@ -222,6 +228,10 @@
 					<version>${maven.surefire.plugin}</version>
 					<configuration>
 						<forkCount>${opennlp.forkCount}</forkCount>
+						<excludes>
+							<exclude>**/stemmer/*</exclude>
+							<exclude>**/stemmer/snowball/*</exclude>
+						</excludes>
 					</configuration>
 				</plugin>

[25/50] [abbrv] opennlp git commit: OPENNLP-996:Remove heap memory settings from Opennlp-tools

Posted by jo...@apache.org.

OPENNLP-996:Remove heap memory settings from Opennlp-tools


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/81acc6e6
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/81acc6e6
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/81acc6e6

Branch: refs/heads/parser_regression
Commit: 81acc6e69a7120b3f9644d54c30cae34b02b78f1
Parents: 82caa55
Author: smarthi <sm...@apache.org>
Authored: Sun Feb 26 12:56:04 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:54 2017 +0200

----------------------------------------------------------------------
 opennlp-tools/pom.xml | 5 -----
 1 file changed, 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/81acc6e6/opennlp-tools/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index d2630c9..663e903 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -33,10 +33,6 @@
   <packaging>bundle</packaging>
   <name>Apache OpenNLP Tools</name>
 
-  <properties>
-    <argLine>-Xmx4096m</argLine>
-  </properties>
-
   <dependencies>
     <dependency>
       <groupId>org.osgi</groupId>
@@ -81,7 +77,6 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
-          <argLine>@{argLine}</argLine>
           <excludes>
             <exclude>/opennlp/tools/eval/**/*</exclude>
           </excludes>

[02/50] [abbrv] opennlp git commit: NoJira: Fix Coveralls Report, this closes apache/opennlp#116

Posted by jo...@apache.org.

NoJira: Fix Coveralls Report, this closes apache/opennlp#116


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/6ecc17e8
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/6ecc17e8
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/6ecc17e8

Branch: refs/heads/parser_regression
Commit: 6ecc17e88b096cd7a12f65b869d9ce6a9444727e
Parents: 6f33261
Author: smarthi <sm...@apache.org>
Authored: Tue Feb 7 22:46:28 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:51 2017 +0200

----------------------------------------------------------------------
 README.md             |  5 ++---
 opennlp-tools/pom.xml | 10 +++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/6ecc17e8/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index faff141..2d31eb1 100644
--- a/README.md
+++ b/README.md
@@ -20,10 +20,9 @@ Welcome to Apache OpenNLP!
 
 [![Build Status](https://api.travis-ci.org/apache/opennlp.svg?branch=master)](https://travis-ci.org/apache/opennlp)
 [![Coverage Status](https://coveralls.io/repos/github/apache/opennlp/badge.svg?branch=master)](https://coveralls.io/github/apache/opennlp?branch=master)
+[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic])](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp)
 [![Documentation Status](https://img.shields.io/:docs-latest-green.svg)](http://opennlp.apache.org/documentation.html)
 [![GitHub license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/apache/opennlp/master/LICENSE)
-[![GitHub forks](https://img.shields.io/github/forks/apache/opennlp.svg)](https://github.com/apache/opennlp/network)
-[![GitHub stars](https://img.shields.io/github/stars/apache/opennlp.svg)](https://github.com/apache/opennlp/stargazers)
 [![Twitter Follow](https://img.shields.io/twitter/follow/ApacheOpennlp.svg?style=social)](https://twitter.com/ApacheOpenNLP)
 
 The Apache OpenNLP library is a machine learning based toolkit for the processing of natural language text.
@@ -38,7 +37,7 @@ well as the annotated text resources that those models are derived from.
 
 For additional information about OpenNLP, visit the [OpenNLP Home Page](http://opennlp.apache.org/)
 
-Documentation for OpenNLP, including JavaDocs, code usage and command line interface are available[here](http://opennlp.apache.org/documentation.html)
+Documentation for OpenNLP, including JavaDocs, code usage and command line interface are available [here](http://opennlp.apache.org/documentation.html)
 
 ####Using OpenNLP as a Library
 Running any application that uses OpenNLP will require installing a binary or source version and setting the environment.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/6ecc17e8/opennlp-tools/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index 22fc017..c7e9624 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -33,6 +33,10 @@
   <packaging>bundle</packaging>
   <name>Apache OpenNLP Tools</name>
 
+  <properties>
+    <argLine>-Xmx4096m</argLine>
+  </properties>
+
   <dependencies>
     <dependency>
       <groupId>org.osgi</groupId>
@@ -41,7 +45,7 @@
       <scope>provided</scope>
       <optional>true</optional>
     </dependency>
-        
+
     <dependency>
       <groupId>org.osgi</groupId>
       <artifactId>org.osgi.compendium</artifactId>
@@ -49,7 +53,7 @@
       <scope>provided</scope>
       <optional>true</optional>
     </dependency>
-        
+
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
@@ -77,7 +81,7 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
-          <argLine>-Xmx4096m</argLine>
+          <argLine>@{argLine}</argLine>
           <excludes>
             <exclude>/opennlp/tools/eval/**/*</exclude>
           </excludes>

[38/50] [abbrv] opennlp git commit: OPENNLP-1002 Remove deprecated GIS class

Posted by jo...@apache.org.

OPENNLP-1002 Remove deprecated GIS class


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/fc10d2e9
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/fc10d2e9
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/fc10d2e9

Branch: refs/heads/parser_regression
Commit: fc10d2e9ec3c98e93bdae7d503f1e09848a28a6a
Parents: 1b6ad71
Author: J�rn Kottmann <jo...@apache.org>
Authored: Sun Mar 12 11:10:43 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:56 2017 +0200

----------------------------------------------------------------------
 .../cmdline/parser/BuildModelUpdaterTool.java   |   7 +-
 .../cmdline/parser/CheckModelUpdaterTool.java   |   7 +-
 .../main/java/opennlp/tools/ml/maxent/GIS.java  | 303 -------------------
 .../tools/ml/maxent/GISIndexingTest.java        |  78 +++--
 .../tools/ml/maxent/ScaleDoesntMatterTest.java  |  20 +-
 5 files changed, 80 insertions(+), 335 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
index 327355b..7efd342 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java
@@ -20,7 +20,8 @@ package opennlp.tools.cmdline.parser;
 import java.io.IOException;
 
 import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.parser.Parse;
@@ -28,6 +29,7 @@ import opennlp.tools.parser.ParserEventTypeEnum;
 import opennlp.tools.parser.ParserModel;
 import opennlp.tools.parser.chunking.ParserEventStream;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.model.ModelUtil;
 
 public final class BuildModelUpdaterTool extends ModelUpdaterTool {
 
@@ -50,7 +52,8 @@ public final class BuildModelUpdaterTool extends ModelUpdaterTool {
     ObjectStream<Event> bes = new ParserEventStream(parseSamples,
         originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict);
 
-    GIS trainer = new GIS();
+    EventTrainer trainer = TrainerFactory.getEventTrainer(
+        ModelUtil.createDefaultTrainingParameters(), null);
     MaxentModel buildModel = trainer.train(bes);
 
     parseSamples.close();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
index 55e96ba..0c98812 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java
@@ -20,7 +20,8 @@ package opennlp.tools.cmdline.parser;
 import java.io.IOException;
 
 import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.parser.Parse;
@@ -28,6 +29,7 @@ import opennlp.tools.parser.ParserEventTypeEnum;
 import opennlp.tools.parser.ParserModel;
 import opennlp.tools.parser.chunking.ParserEventStream;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.model.ModelUtil;
 
 // trains a new check model ...
 public final class CheckModelUpdaterTool extends ModelUpdaterTool {
@@ -51,7 +53,8 @@ public final class CheckModelUpdaterTool extends ModelUpdaterTool {
     ObjectStream<Event> bes = new ParserEventStream(parseSamples,
         originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict);
 
-    GIS trainer = new GIS();
+    EventTrainer trainer = TrainerFactory.getEventTrainer(
+        ModelUtil.createDefaultTrainingParameters(), null);
     MaxentModel checkModel = trainer.train(bes);
 
     parseSamples.close();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
deleted file mode 100644
index 97c214d..0000000
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.ml.maxent;
-
-import java.io.IOException;
-
-import opennlp.tools.ml.AbstractEventTrainer;
-import opennlp.tools.ml.model.AbstractModel;
-import opennlp.tools.ml.model.DataIndexer;
-import opennlp.tools.ml.model.Event;
-import opennlp.tools.ml.model.Prior;
-import opennlp.tools.ml.model.UniformPrior;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.TrainingParameters;
-
-/**
- * A Factory class which uses instances of GISTrainer to create and train
- * GISModels.
- * @deprecated use {@link GISTrainer}
- */
-@Deprecated
-public class GIS extends AbstractEventTrainer {
-
-  public static final String MAXENT_VALUE = "MAXENT";
-
-  /**
-   * Set this to false if you don't want messages about the progress of model
-   * training displayed. Alternately, you can use the overloaded version of
-   * trainModel() to conditionally enable progress messages.
-   */
-  public static boolean PRINT_MESSAGES = true;
-
-  /**
-   * If we are using smoothing, this is used as the "number" of times we want
-   * the trainer to imagine that it saw a feature that it actually didn't see.
-   * Defaulted to 0.1.
-   */
-  private static final double SMOOTHING_OBSERVATION = 0.1;
-
-  private static final String SMOOTHING_PARAM = "smoothing";
-  private static final boolean SMOOTHING_DEFAULT = false;
-
-  public GIS() {
-  }
-
-  public GIS(TrainingParameters parameters) {
-    super(parameters);
-  }
-  
-  public boolean isValid() {
-
-    if (!super.isValid()) {
-      return false;
-    }
-
-    String algorithmName = getAlgorithm();
-
-    return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName)));
-  }
-
-  public boolean isSortAndMerge() {
-    return true;
-  }
-
-  public AbstractModel doTrain(DataIndexer indexer) throws IOException {
-    int iterations = getIterations();
-
-    AbstractModel model;
-
-    boolean printMessages = trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT);
-    boolean smoothing = trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT);
-    int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1);
-
-    model = trainModel(iterations, indexer, printMessages, smoothing, null, threads);
-
-    return model;
-  }
-
-  // << members related to AbstractEventTrainer
-
-  /**
-   * Train a model using the GIS algorithm, assuming 100 iterations and no
-   * cutoff.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream) throws IOException {
-    return trainModel(eventStream, 100, 0, false, PRINT_MESSAGES);
-  }
-
-  /**
-   * Train a model using the GIS algorithm, assuming 100 iterations and no
-   * cutoff.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream, boolean smoothing)
-      throws IOException {
-    return trainModel(eventStream, 100, 0, smoothing, PRINT_MESSAGES);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param cutoff
-   *          The number of times a feature must be seen in order to be relevant
-   *          for training.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
-      int cutoff) throws IOException {
-    return trainModel(eventStream, iterations, cutoff, false, PRINT_MESSAGES);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param cutoff
-   *          The number of times a feature must be seen in order to be relevant
-   *          for training.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @param printMessagesWhileTraining
-   *          Determines whether training status messages are written to STDOUT.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
-      int cutoff, boolean smoothing, boolean printMessagesWhileTraining)
-      throws IOException {
-    GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
-    trainer.setSmoothing(smoothing);
-    trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
-    return trainer.trainModel(eventStream, iterations, cutoff);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param eventStream
-   *          The EventStream holding the data on which this model will be
-   *          trained.
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param cutoff
-   *          The number of times a feature must be seen in order to be relevant
-   *          for training.
-   * @param sigma
-   *          The standard deviation for the gaussian smoother.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
-      int cutoff, double sigma) throws IOException {
-    GISTrainer trainer = new GISTrainer(PRINT_MESSAGES);
-    if (sigma > 0) {
-      trainer.setGaussianSigma(sigma);
-    }
-    return trainer.trainModel(eventStream, iterations, cutoff);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) {
-    return trainModel(iterations, indexer, true, smoothing, null, 1);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer) {
-    return trainModel(iterations, indexer, true, false, null, 1);
-  }
-
-  /**
-   * Train a model using the GIS algorithm with the specified number of
-   * iterations, data indexer, and prior.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @param modelPrior
-   *          The prior distribution for the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer,
-      Prior modelPrior, int cutoff) {
-    return trainModel(iterations, indexer, true, false, modelPrior, cutoff);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @param printMessagesWhileTraining
-   *          Determines whether training status messages are written to STDOUT.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @param modelPrior
-   *          The prior distribution for the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer,
-                                    boolean printMessagesWhileTraining, boolean smoothing,
-                                    Prior modelPrior) {
-    return trainModel(iterations, indexer, printMessagesWhileTraining, smoothing, modelPrior, 1);
-  }
-
-  /**
-   * Train a model using the GIS algorithm.
-   *
-   * @param iterations
-   *          The number of GIS iterations to perform.
-   * @param indexer
-   *          The object which will be used for event compilation.
-   * @param printMessagesWhileTraining
-   *          Determines whether training status messages are written to STDOUT.
-   * @param smoothing
-   *          Defines whether the created trainer will use smoothing while
-   *          training the model.
-   * @param modelPrior
-   *          The prior distribution for the model.
-   * @return The newly trained model, which can be used immediately or saved to
-   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
-   */
-  public static GISModel trainModel(int iterations, DataIndexer indexer,
-                                    boolean printMessagesWhileTraining, boolean smoothing,
-                                    Prior modelPrior, int threads) {
-    GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
-    trainer.setSmoothing(smoothing);
-    trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
-    if (modelPrior == null) {
-      modelPrior = new UniformPrior();
-    }
-    return trainer.trainModel(iterations, indexer, modelPrior, threads);
-  }
-}
-
-
-

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
index 6922603..c8bc27f 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/GISIndexingTest.java
@@ -17,6 +17,7 @@
 
 package opennlp.tools.ml.maxent;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -26,6 +27,7 @@ import org.junit.Assert;
 import org.junit.Test;
 
 import opennlp.tools.ml.AbstractEventTrainer;
+import opennlp.tools.ml.AbstractTrainer;
 import opennlp.tools.ml.EventTrainer;
 import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.ml.maxent.quasinewton.QNTrainer;
@@ -36,6 +38,7 @@ import opennlp.tools.ml.model.Event;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ObjectStreamUtils;
 import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
 
 public class GISIndexingTest {
 
@@ -58,40 +61,63 @@ public class GISIndexingTest {
    * Test the GIS.trainModel(ObjectStream<Event> eventStream) method
    */
   @Test
-  public void testGISTrainSignature1() throws Exception {
-    ObjectStream<Event> eventStream = createEventStream();
-    Assert.assertNotNull(GIS.trainModel(eventStream));
-    eventStream.close();
+  public void testGISTrainSignature1() throws IOException {
+    try (ObjectStream<Event> eventStream = createEventStream()) {
+      TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+      params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+
+      EventTrainer trainer = TrainerFactory.getEventTrainer(params,  null);
+
+      Assert.assertNotNull(trainer.train(eventStream));
+    }
   }
 
   /*
    * Test the GIS.trainModel(ObjectStream<Event> eventStream,boolean smoothing) method
    */
   @Test
-  public void testGISTrainSignature2() throws Exception {
-    ObjectStream<Event> eventStream = createEventStream();
-    Assert.assertNotNull(GIS.trainModel(eventStream,true));
-    eventStream.close();
+  public void testGISTrainSignature2() throws IOException {
+    try (ObjectStream<Event> eventStream = createEventStream()) {
+      TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+      params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+      params.put("smoothing", "true");
+      EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
+
+      Assert.assertNotNull(trainer.train(eventStream));
+    }
   }
   
   /*
    * Test the GIS.trainModel(ObjectStream<Event> eventStream, int iterations, int cutoff) method
    */
   @Test
-  public void testGISTrainSignature3() throws Exception {
-    ObjectStream<Event> eventStream = createEventStream();
-    Assert.assertNotNull(GIS.trainModel(eventStream,10,1));
-    eventStream.close();
+  public void testGISTrainSignature3() throws IOException {
+    try (ObjectStream<Event> eventStream = createEventStream()) {
+      TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+      params.put(AbstractTrainer.ITERATIONS_PARAM, "10");
+      params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+
+      EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
+
+      Assert.assertNotNull(trainer.train(eventStream));
+    }
   }
  
   /*
    * Test the GIS.trainModel(ObjectStream<Event> eventStream, int iterations, int cutoff, double sigma) method
    */
   @Test
-  public void testGISTrainSignature4() throws Exception {
-    ObjectStream<Event> eventStream = createEventStream();
-    Assert.assertNotNull(GIS.trainModel(eventStream,10,1,0.01));
-    eventStream.close();
+  public void testGISTrainSignature4() throws IOException {
+    try (ObjectStream<Event> eventStream = createEventStream()) {
+      TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+      params.put(AbstractTrainer.ITERATIONS_PARAM, "10");
+      params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+      GISTrainer trainer = (GISTrainer) TrainerFactory.getEventTrainer(params, null);
+      trainer.setGaussianSigma(0.01);
+
+      Assert.assertNotNull(trainer.trainModel(eventStream));
+    }
   }
   
   /*
@@ -99,14 +125,22 @@ public class GISIndexingTest {
    * boolean smoothing, boolean printMessagesWhileTraining)) method
    */
   @Test
-  public void testGISTrainSignature5() throws Exception {
-    ObjectStream<Event> eventStream = createEventStream();
-    Assert.assertNotNull(GIS.trainModel(eventStream,10,1,false,false));
-    eventStream.close();
+  public void testGISTrainSignature5() throws IOException {
+    try (ObjectStream<Event> eventStream = createEventStream()) {
+      TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
+
+      params.put(AbstractTrainer.ITERATIONS_PARAM, "10");
+      params.put(AbstractTrainer.CUTOFF_PARAM, "1");
+      params.put("smoothing", "false");
+      params.put(AbstractTrainer.VERBOSE_PARAM, "false");
+
+      EventTrainer trainer = TrainerFactory.getEventTrainer(params, null);
+      Assert.assertNotNull(trainer.train(eventStream));
+    }
   }
   
   @Test
-  public void testIndexingWithTrainingParameters() throws Exception {
+  public void testIndexingWithTrainingParameters() throws IOException {
     ObjectStream<Event> eventStream = createEventStream();
     
     TrainingParameters parameters = TrainingParameters.defaultParams();
@@ -147,7 +181,7 @@ public class GISIndexingTest {
   }
   
   @Test
-  public void testIndexingFactory() throws Exception {
+  public void testIndexingFactory() throws IOException {
     Map<String,String> myReportMap = new HashMap<>();
     ObjectStream<Event> eventStream = createEventStream();
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/fc10d2e9/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
index 76a4813..1e5c8a3 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
@@ -25,6 +25,8 @@ import org.junit.Before;
 import org.junit.Test;
 
 import opennlp.tools.ml.AbstractTrainer;
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.ml.model.DataIndexer;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
@@ -34,6 +36,7 @@ import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelUtil;
 
 public class ScaleDoesntMatterTest {
 
@@ -52,7 +55,6 @@ public class ScaleDoesntMatterTest {
    * predicates doesn't matter when it comes the probability assigned to each
    * outcome. Strangely, if we use (1,2) and (10,20) there's no difference. If
    * we use (0.1,0.2) and (10,20) there is a difference.
-   *
    */
   @Test
   public void testScaleResults() throws Exception {
@@ -68,8 +70,11 @@ public class ScaleDoesntMatterTest {
         new PlainTextByLineStream(new MockInputStreamFactory(smallValues), StandardCharsets.UTF_8));
 
     testDataIndexer.index(smallEventStream);
-    MaxentModel smallModel = GIS.trainModel(100,
-        testDataIndexer, false);
+
+    EventTrainer smallModelTrainer = TrainerFactory.getEventTrainer(
+        ModelUtil.createDefaultTrainingParameters(), null);
+
+    MaxentModel smallModel = smallModelTrainer.train(testDataIndexer);
     String[] contexts = smallTest.split(" ");
     float[] values = RealValueFileEventStream.parseContexts(contexts);
     double[] smallResults = smallModel.eval(contexts, values);
@@ -81,13 +86,16 @@ public class ScaleDoesntMatterTest {
         new PlainTextByLineStream(new MockInputStreamFactory(largeValues), StandardCharsets.UTF_8));
 
     testDataIndexer.index(largeEventStream);
-    MaxentModel largeModel = GIS.trainModel(100,
-        testDataIndexer, false);
+
+    EventTrainer largeModelTrainer = TrainerFactory.getEventTrainer(
+        ModelUtil.createDefaultTrainingParameters(), null);
+
+    MaxentModel largeModel = largeModelTrainer.train(testDataIndexer);
     contexts = largeTest.split(" ");
     values = RealValueFileEventStream.parseContexts(contexts);
     double[] largeResults = largeModel.eval(contexts, values);
 
-    String largeResultString = smallModel.getAllOutcomes(largeResults);
+    String largeResultString = largeModel.getAllOutcomes(largeResults);
     System.out.println("largeResults: " + largeResultString);
 
     Assert.assertEquals(smallResults.length, largeResults.length);

[32/50] [abbrv] opennlp git commit: Revert "OPENNLP-1002 Remove deprecated GIS class"

Posted by jo...@apache.org.

Revert "OPENNLP-1002 Remove deprecated GIS class"

This reverts commit efa257676280abd316bb677e5a8de5cb9fe1dd73.


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/1b6ad719
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/1b6ad719
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/1b6ad719

Branch: refs/heads/parser_regression
Commit: 1b6ad719760ead028810715b49ff15219385ee42
Parents: 7487812
Author: J�rn Kottmann <jo...@apache.org>
Authored: Fri Mar 10 17:22:28 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:55 2017 +0200

----------------------------------------------------------------------
 .../main/java/opennlp/tools/ml/maxent/GIS.java  | 303 +++++++++++++++++++
 1 file changed, 303 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/1b6ad719/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
new file mode 100644
index 0000000..97c214d
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GIS.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.maxent;
+
+import java.io.IOException;
+
+import opennlp.tools.ml.AbstractEventTrainer;
+import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.Prior;
+import opennlp.tools.ml.model.UniformPrior;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * A Factory class which uses instances of GISTrainer to create and train
+ * GISModels.
+ * @deprecated use {@link GISTrainer}
+ */
+@Deprecated
+public class GIS extends AbstractEventTrainer {
+
+  public static final String MAXENT_VALUE = "MAXENT";
+
+  /**
+   * Set this to false if you don't want messages about the progress of model
+   * training displayed. Alternately, you can use the overloaded version of
+   * trainModel() to conditionally enable progress messages.
+   */
+  public static boolean PRINT_MESSAGES = true;
+
+  /**
+   * If we are using smoothing, this is used as the "number" of times we want
+   * the trainer to imagine that it saw a feature that it actually didn't see.
+   * Defaulted to 0.1.
+   */
+  private static final double SMOOTHING_OBSERVATION = 0.1;
+
+  private static final String SMOOTHING_PARAM = "smoothing";
+  private static final boolean SMOOTHING_DEFAULT = false;
+
+  public GIS() {
+  }
+
+  public GIS(TrainingParameters parameters) {
+    super(parameters);
+  }
+  
+  public boolean isValid() {
+
+    if (!super.isValid()) {
+      return false;
+    }
+
+    String algorithmName = getAlgorithm();
+
+    return !(algorithmName != null && !(MAXENT_VALUE.equals(algorithmName)));
+  }
+
+  public boolean isSortAndMerge() {
+    return true;
+  }
+
+  public AbstractModel doTrain(DataIndexer indexer) throws IOException {
+    int iterations = getIterations();
+
+    AbstractModel model;
+
+    boolean printMessages = trainingParameters.getBooleanParameter(VERBOSE_PARAM, VERBOSE_DEFAULT);
+    boolean smoothing = trainingParameters.getBooleanParameter(SMOOTHING_PARAM, SMOOTHING_DEFAULT);
+    int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1);
+
+    model = trainModel(iterations, indexer, printMessages, smoothing, null, threads);
+
+    return model;
+  }
+
+  // << members related to AbstractEventTrainer
+
+  /**
+   * Train a model using the GIS algorithm, assuming 100 iterations and no
+   * cutoff.
+   *
+   * @param eventStream
+   *          The EventStream holding the data on which this model will be
+   *          trained.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(ObjectStream<Event> eventStream) throws IOException {
+    return trainModel(eventStream, 100, 0, false, PRINT_MESSAGES);
+  }
+
+  /**
+   * Train a model using the GIS algorithm, assuming 100 iterations and no
+   * cutoff.
+   *
+   * @param eventStream
+   *          The EventStream holding the data on which this model will be
+   *          trained.
+   * @param smoothing
+   *          Defines whether the created trainer will use smoothing while
+   *          training the model.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(ObjectStream<Event> eventStream, boolean smoothing)
+      throws IOException {
+    return trainModel(eventStream, 100, 0, smoothing, PRINT_MESSAGES);
+  }
+
+  /**
+   * Train a model using the GIS algorithm.
+   *
+   * @param eventStream
+   *          The EventStream holding the data on which this model will be
+   *          trained.
+   * @param iterations
+   *          The number of GIS iterations to perform.
+   * @param cutoff
+   *          The number of times a feature must be seen in order to be relevant
+   *          for training.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+      int cutoff) throws IOException {
+    return trainModel(eventStream, iterations, cutoff, false, PRINT_MESSAGES);
+  }
+
+  /**
+   * Train a model using the GIS algorithm.
+   *
+   * @param eventStream
+   *          The EventStream holding the data on which this model will be
+   *          trained.
+   * @param iterations
+   *          The number of GIS iterations to perform.
+   * @param cutoff
+   *          The number of times a feature must be seen in order to be relevant
+   *          for training.
+   * @param smoothing
+   *          Defines whether the created trainer will use smoothing while
+   *          training the model.
+   * @param printMessagesWhileTraining
+   *          Determines whether training status messages are written to STDOUT.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+      int cutoff, boolean smoothing, boolean printMessagesWhileTraining)
+      throws IOException {
+    GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
+    trainer.setSmoothing(smoothing);
+    trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
+    return trainer.trainModel(eventStream, iterations, cutoff);
+  }
+
+  /**
+   * Train a model using the GIS algorithm.
+   *
+   * @param eventStream
+   *          The EventStream holding the data on which this model will be
+   *          trained.
+   * @param iterations
+   *          The number of GIS iterations to perform.
+   * @param cutoff
+   *          The number of times a feature must be seen in order to be relevant
+   *          for training.
+   * @param sigma
+   *          The standard deviation for the gaussian smoother.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(ObjectStream<Event> eventStream, int iterations,
+      int cutoff, double sigma) throws IOException {
+    GISTrainer trainer = new GISTrainer(PRINT_MESSAGES);
+    if (sigma > 0) {
+      trainer.setGaussianSigma(sigma);
+    }
+    return trainer.trainModel(eventStream, iterations, cutoff);
+  }
+
+  /**
+   * Train a model using the GIS algorithm.
+   *
+   * @param iterations
+   *          The number of GIS iterations to perform.
+   * @param indexer
+   *          The object which will be used for event compilation.
+   * @param smoothing
+   *          Defines whether the created trainer will use smoothing while
+   *          training the model.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) {
+    return trainModel(iterations, indexer, true, smoothing, null, 1);
+  }
+
+  /**
+   * Train a model using the GIS algorithm.
+   *
+   * @param iterations
+   *          The number of GIS iterations to perform.
+   * @param indexer
+   *          The object which will be used for event compilation.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(int iterations, DataIndexer indexer) {
+    return trainModel(iterations, indexer, true, false, null, 1);
+  }
+
+  /**
+   * Train a model using the GIS algorithm with the specified number of
+   * iterations, data indexer, and prior.
+   *
+   * @param iterations
+   *          The number of GIS iterations to perform.
+   * @param indexer
+   *          The object which will be used for event compilation.
+   * @param modelPrior
+   *          The prior distribution for the model.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(int iterations, DataIndexer indexer,
+      Prior modelPrior, int cutoff) {
+    return trainModel(iterations, indexer, true, false, modelPrior, cutoff);
+  }
+
+  /**
+   * Train a model using the GIS algorithm.
+   *
+   * @param iterations
+   *          The number of GIS iterations to perform.
+   * @param indexer
+   *          The object which will be used for event compilation.
+   * @param printMessagesWhileTraining
+   *          Determines whether training status messages are written to STDOUT.
+   * @param smoothing
+   *          Defines whether the created trainer will use smoothing while
+   *          training the model.
+   * @param modelPrior
+   *          The prior distribution for the model.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(int iterations, DataIndexer indexer,
+                                    boolean printMessagesWhileTraining, boolean smoothing,
+                                    Prior modelPrior) {
+    return trainModel(iterations, indexer, printMessagesWhileTraining, smoothing, modelPrior, 1);
+  }
+
+  /**
+   * Train a model using the GIS algorithm.
+   *
+   * @param iterations
+   *          The number of GIS iterations to perform.
+   * @param indexer
+   *          The object which will be used for event compilation.
+   * @param printMessagesWhileTraining
+   *          Determines whether training status messages are written to STDOUT.
+   * @param smoothing
+   *          Defines whether the created trainer will use smoothing while
+   *          training the model.
+   * @param modelPrior
+   *          The prior distribution for the model.
+   * @return The newly trained model, which can be used immediately or saved to
+   *         disk using an opennlp.tools.ml.maxent.io.GISModelWriter object.
+   */
+  public static GISModel trainModel(int iterations, DataIndexer indexer,
+                                    boolean printMessagesWhileTraining, boolean smoothing,
+                                    Prior modelPrior, int threads) {
+    GISTrainer trainer = new GISTrainer(printMessagesWhileTraining);
+    trainer.setSmoothing(smoothing);
+    trainer.setSmoothingObservation(SMOOTHING_OBSERVATION);
+    if (modelPrior == null) {
+      modelPrior = new UniformPrior();
+    }
+    return trainer.trainModel(iterations, indexer, modelPrior, threads);
+  }
+}
+
+
+

[36/50] [abbrv] opennlp git commit: OPENNLP-125: Make POS Tagger feature generation configurable

Posted by jo...@apache.org.

OPENNLP-125: Make POS Tagger feature generation configurable


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/dd39d066
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/dd39d066
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/dd39d066

Branch: refs/heads/parser_regression
Commit: dd39d06629294f3c9bd3980d02ba0f1716839e0e
Parents: 711d70b
Author: J�rn Kottmann <jo...@apache.org>
Authored: Thu Feb 9 18:54:27 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:55 2017 +0200

----------------------------------------------------------------------
 .../namefind/TokenNameFinderTrainerTool.java    |   2 +-
 .../postag/POSTaggerCrossValidatorTool.java     |  10 +-
 .../cmdline/postag/POSTaggerTrainerTool.java    |  26 +--
 .../tools/cmdline/postag/TrainingParams.java    |  13 +-
 .../postag/ConfigurablePOSContextGenerator.java | 105 +++++++++++
 .../opennlp/tools/postag/POSDictionary.java     |   8 +-
 .../java/opennlp/tools/postag/POSModel.java     |  40 +++--
 .../tools/postag/POSTaggerCrossValidator.java   |  44 ++---
 .../opennlp/tools/postag/POSTaggerFactory.java  | 179 ++++++++++++++++++-
 .../tools/util/featuregen/GeneratorFactory.java |  12 ++
 .../featuregen/PosTaggerFeatureGenerator.java   |  62 +++++++
 .../tools/postag/pos-default-features.xml       |  38 ++++
 .../ConfigurablePOSContextGeneratorTest.java    |  55 ++++++
 .../tools/postag/DummyPOSTaggerFactory.java     |  14 +-
 .../tools/postag/POSTaggerFactoryTest.java      |  11 +-
 15 files changed, 534 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
index 5bb18d2..4fb8cb9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
@@ -67,7 +67,7 @@ public final class TokenNameFinderTrainerTool
     return null;
   }
 
-  static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
+  public static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
     byte[] featureGeneratorBytes = null;
     // load descriptor file into memory
     if (featureGenDescriptorFile != null) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
index d91d4ee..67ad2b9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java
@@ -22,10 +22,12 @@ import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.Map;
 
 import opennlp.tools.cmdline.AbstractCrossValidatorTool;
 import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
 import opennlp.tools.cmdline.params.CVParams;
 import opennlp.tools.cmdline.params.FineGrainedEvaluatorParams;
 import opennlp.tools.cmdline.postag.POSTaggerCrossValidatorTool.CVToolParams;
@@ -75,10 +77,16 @@ public final class POSTaggerCrossValidatorTool
       }
     }
 
+    Map<String, Object> resources = TokenNameFinderTrainerTool.loadResources(
+        params.getResources(), params.getFeaturegen());
+
+    byte[] featureGeneratorBytes =
+        TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
+
     POSTaggerCrossValidator validator;
     try {
       validator = new POSTaggerCrossValidator(params.getLang(), mlParams,
-          params.getDict(), params.getNgram(), params.getTagDictCutoff(),
+          params.getDict(), featureGeneratorBytes, resources, params.getTagDictCutoff(),
           params.getFactory(), missclassifiedListener, reportListener);
 
       validator.evaluate(sampleStream, params.getFolds());

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
index 1e6fb54..b922176 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
@@ -19,13 +19,14 @@ package opennlp.tools.cmdline.postag;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Map;
 
 import opennlp.tools.cmdline.AbstractTrainerTool;
 import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
 import opennlp.tools.cmdline.params.TrainingToolParams;
 import opennlp.tools.cmdline.postag.POSTaggerTrainerTool.TrainerToolParams;
-import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.postag.MutableTagDictionary;
 import opennlp.tools.postag.POSModel;
@@ -66,25 +67,16 @@ public final class POSTaggerTrainerTool
     File modelOutFile = params.getModel();
     CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);
 
-    Dictionary ngramDict = null;
+    Map<String, Object> resources = TokenNameFinderTrainerTool.loadResources(
+        params.getResources(), params.getFeaturegen());
 
-    Integer ngramCutoff = params.getNgram();
-
-    if (ngramCutoff != null) {
-      System.err.print("Building ngram dictionary ... ");
-      try {
-        ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff);
-        sampleStream.reset();
-      } catch (IOException e) {
-        throw new TerminateToolException(-1,
-            "IO error while building NGram Dictionary: " + e.getMessage(), e);
-      }
-      System.err.println("done");
-    }
+    byte[] featureGeneratorBytes =
+        TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
 
     POSTaggerFactory postaggerFactory;
     try {
-      postaggerFactory = POSTaggerFactory.create(params.getFactory(), ngramDict, null);
+      postaggerFactory = POSTaggerFactory.create(params.getFactory(), featureGeneratorBytes,
+          resources, null);
     } catch (InvalidFormatException e) {
       throw new TerminateToolException(-1, e.getMessage(), e);
     }
@@ -95,7 +87,7 @@ public final class POSTaggerTrainerTool
             .createTagDictionary(params.getDict()));
       } catch (IOException e) {
         throw new TerminateToolException(-1,
-            "IO error while loading POS Dictionary: " + e.getMessage(), e);
+            "IO error while loading POS Dictionary", e);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
index 690b359..31d5e48 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
@@ -29,14 +29,17 @@ import opennlp.tools.cmdline.params.BasicTrainingParams;
  * Note: Do not use this class, internal use only!
  */
 interface TrainingParams extends BasicTrainingParams {
-  @ParameterDescription(valueName = "dictionaryPath", description = "The XML tag dictionary file")
+  @ParameterDescription(valueName = "featuregenFile", description = "The feature generator descriptor file")
   @OptionalParameter
-  File getDict();
+  File getFeaturegen();
+
+  @ParameterDescription(valueName = "resourcesDir", description = "The resources directory")
+  @OptionalParameter
+  File getResources();
 
-  @ParameterDescription(valueName = "cutoff",
-      description = "NGram cutoff. If not specified will not create ngram dictionary.")
+  @ParameterDescription(valueName = "dictionaryPath", description = "The XML tag dictionary file")
   @OptionalParameter
-  Integer getNgram();
+  File getDict();
 
   @ParameterDescription(valueName = "tagDictCutoff",
       description = "TagDictionary cutoff. If specified will create/expand a mutable TagDictionary")

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/postag/ConfigurablePOSContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/ConfigurablePOSContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/ConfigurablePOSContextGenerator.java
new file mode 100644
index 0000000..e6b65df
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/ConfigurablePOSContextGenerator.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.postag;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import opennlp.tools.util.Cache;
+import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
+
+/**
+ * A context generator for the POS Tagger.
+ */
+public class ConfigurablePOSContextGenerator implements POSContextGenerator {
+
+  private Cache<String, String[]> contextsCache;
+  private Object wordsKey;
+
+  private final AdaptiveFeatureGenerator featureGenerator;
+
+  /**
+   * Initializes the current instance.
+   *
+   * @param cacheSize
+   */
+  public ConfigurablePOSContextGenerator(int cacheSize, AdaptiveFeatureGenerator featureGenerator) {
+    this.featureGenerator = Objects.requireNonNull(featureGenerator, "featureGenerator must not be null");
+
+    if (cacheSize > 0) {
+      contextsCache = new Cache<>(cacheSize);
+    }
+  }
+
+  /**
+   * Initializes the current instance.
+   *
+   */
+  public ConfigurablePOSContextGenerator(AdaptiveFeatureGenerator featureGenerator) {
+    this(0, featureGenerator);
+  }
+
+  /**
+   * Returns the context for making a pos tag decision at the specified token index
+   * given the specified tokens and previous tags.
+   * @param index The index of the token for which the context is provided.
+   * @param tokens The tokens in the sentence.
+   * @param tags The tags assigned to the previous words in the sentence.
+   * @return The context for making a pos tag decision at the specified token index
+   *     given the specified tokens and previous tags.
+   */
+  public String[] getContext(int index, String[] tokens, String[] tags,
+      Object[] additionalContext) {
+
+    String tagprev = null;
+    String tagprevprev = null;
+
+    if (index - 1 >= 0) {
+      tagprev =  tags[index - 1];
+
+      if (index - 2 >= 0) {
+        tagprevprev = tags[index - 2];
+      }
+    }
+
+    String cacheKey = index + tagprev + tagprevprev;
+    if (contextsCache != null) {
+      if (wordsKey == tokens) {
+        String[] cachedContexts = contextsCache.get(cacheKey);
+        if (cachedContexts != null) {
+          return cachedContexts;
+        }
+      }
+      else {
+        contextsCache.clear();
+        wordsKey = tokens;
+      }
+    }
+
+    List<String> e = new ArrayList<>();
+
+    featureGenerator.createFeatures(e, tokens, index, tags);
+
+    String[] contexts = e.toArray(new String[e.size()]);
+    if (contextsCache != null) {
+      contextsCache.put(cacheKey, contexts);
+    }
+    return contexts;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
index 5f5eb25..90d51c1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
@@ -32,12 +32,13 @@ import opennlp.tools.dictionary.serializer.Entry;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.StringList;
 import opennlp.tools.util.StringUtil;
+import opennlp.tools.util.model.SerializableArtifact;
 
 /**
  * Provides a means of determining which tags are valid for a particular word
  * based on a tag dictionary read from a file.
  */
-public class POSDictionary implements Iterable<String>, MutableTagDictionary {
+public class POSDictionary implements Iterable<String>, MutableTagDictionary, SerializableArtifact {
 
   private Map<String, String[]> dictionary;
 
@@ -265,4 +266,9 @@ public class POSDictionary implements Iterable<String>, MutableTagDictionary {
   public boolean isCaseSensitive() {
     return this.caseSensitive;
   }
+
+  @Override
+  public Class<?> getArtifactSerializerClass() {
+    return POSTaggerFactory.POSDictionarySerializer.class;
+  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
index bfe5c90..f81092b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Properties;
 
 import opennlp.tools.dictionary.Dictionary;
@@ -32,6 +33,7 @@ import opennlp.tools.util.BaseToolFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.BaseModel;
+import opennlp.tools.util.model.ByteArraySerializer;
 
 /**
  * The {@link POSModel} is the model used
@@ -42,18 +44,23 @@ import opennlp.tools.util.model.BaseModel;
 public final class POSModel extends BaseModel {
 
   private static final String COMPONENT_NAME = "POSTaggerME";
-
   static final String POS_MODEL_ENTRY_NAME = "pos.model";
+  static final String GENERATOR_DESCRIPTOR_ENTRY_NAME = "generator.featuregen";
 
   public POSModel(String languageCode, SequenceClassificationModel<String> posModel,
       Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) {
 
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, posFactory);
 
-    if (posModel == null)
-        throw new IllegalArgumentException("The maxentPosModel param must not be null!");
+    artifactMap.put(POS_MODEL_ENTRY_NAME,
+        Objects.requireNonNull(posModel, "posModel must not be null"));
+
+    artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, posFactory.getFeatureGenerator());
+
+    for (Map.Entry<String, Object> resource : posFactory.getResources().entrySet()) {
+      artifactMap.put(resource.getKey(), resource.getValue());
+    }
 
-    artifactMap.put(POS_MODEL_ENTRY_NAME, posModel);
     // TODO: This fails probably for the sequence model ... ?!
     // checkArtifactMap();
   }
@@ -68,13 +75,18 @@ public final class POSModel extends BaseModel {
 
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, posFactory);
 
-    if (posModel == null)
-        throw new IllegalArgumentException("The maxentPosModel param must not be null!");
+    Objects.requireNonNull(posModel, "posModel must not be null");
 
     Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
     manifest.setProperty(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
 
     artifactMap.put(POS_MODEL_ENTRY_NAME, posModel);
+    artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, posFactory.getFeatureGenerator());
+
+    for (Map.Entry<String, Object> resource : posFactory.getResources().entrySet()) {
+      artifactMap.put(resource.getKey(), resource.getValue());
+    }
+
     checkArtifactMap();
   }
 
@@ -96,14 +108,6 @@ public final class POSModel extends BaseModel {
   }
 
   @Override
-  @SuppressWarnings("rawtypes")
-  protected void createArtifactSerializers(
-      Map<String, ArtifactSerializer> serializers) {
-
-    super.createArtifactSerializers(serializers);
-  }
-
-  @Override
   protected void validateArtifactMap() throws InvalidFormatException {
     super.validateArtifactMap();
 
@@ -114,6 +118,7 @@ public final class POSModel extends BaseModel {
 
   /**
    * @deprecated use getPosSequenceModel instead. This method will be removed soon.
+   * Only required for Parser 1.5.x backward compatibility. Newer models don't need this anymore.
    */
   @Deprecated
   public MaxentModel getPosModel() {
@@ -151,6 +156,13 @@ public final class POSModel extends BaseModel {
     return (POSTaggerFactory) this.toolFactory;
   }
 
+  @Override
+  protected void createArtifactSerializers(Map<String, ArtifactSerializer> serializers) {
+    super.createArtifactSerializers(serializers);
+
+    serializers.put("featuregen", new ByteArraySerializer());
+  }
+
   /**
    * Retrieves the ngram dictionary.
    *

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
index 3010e03..a35bbb6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java
@@ -19,6 +19,7 @@ package opennlp.tools.postag;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Map;
 
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.util.ObjectStream;
@@ -32,7 +33,8 @@ public class POSTaggerCrossValidator {
 
   private final TrainingParameters params;
 
-  private Integer ngramCutoff;
+  private byte[] featureGeneratorBytes;
+  private Map<String, Object> resources;
 
   private Mean wordAccuracy = new Mean();
   private POSTaggerEvaluationMonitor[] listeners;
@@ -51,18 +53,21 @@ public class POSTaggerCrossValidator {
    * the tag and the ngram dictionaries.
    */
   public POSTaggerCrossValidator(String languageCode,
-      TrainingParameters trainParam, File tagDictionary,
-      Integer ngramCutoff, Integer tagdicCutoff, String factoryClass,
-      POSTaggerEvaluationMonitor... listeners) {
+                                 TrainingParameters trainParam, File tagDictionary,
+                                 byte[] featureGeneratorBytes, Map<String, Object> resources,
+                                 Integer tagdicCutoff, String factoryClass,
+                                 POSTaggerEvaluationMonitor... listeners) {
     this.languageCode = languageCode;
     this.params = trainParam;
-    this.ngramCutoff = ngramCutoff;
+    this.featureGeneratorBytes = featureGeneratorBytes;
+    this.resources = resources;
     this.listeners = listeners;
     this.factoryClassName = factoryClass;
     this.tagdicCutoff = tagdicCutoff;
     this.tagDictionaryFile = tagDictionary;
   }
 
+
   /**
    * Creates a {@link POSTaggerCrossValidator} using the given
    * {@link POSTaggerFactory}.
@@ -74,7 +79,6 @@ public class POSTaggerCrossValidator {
     this.params = trainParam;
     this.listeners = listeners;
     this.factory = factory;
-    this.ngramCutoff = null;
     this.tagdicCutoff = null;
   }
 
@@ -98,33 +102,18 @@ public class POSTaggerCrossValidator {
       CrossValidationPartitioner.TrainingSampleStream<POSSample> trainingSampleStream = partitioner
           .next();
 
-      if (this.factory == null) {
-        this.factory = POSTaggerFactory.create(this.factoryClassName, null,
-            null);
-      }
-
-      Dictionary ngramDict = this.factory.getDictionary();
-      if (ngramDict == null) {
-        if (this.ngramCutoff != null) {
-          System.err.print("Building ngram dictionary ... ");
-          ngramDict = POSTaggerME.buildNGramDictionary(trainingSampleStream,
-              this.ngramCutoff);
-          trainingSampleStream.reset();
-          System.err.println("done");
-        }
-        this.factory.setDictionary(ngramDict);
-      }
 
       if (this.tagDictionaryFile != null
           && this.factory.getTagDictionary() == null) {
         this.factory.setTagDictionary(this.factory
             .createTagDictionary(tagDictionaryFile));
       }
+
+      TagDictionary dict = null;
       if (this.tagdicCutoff != null) {
-        TagDictionary dict = this.factory.getTagDictionary();
+        dict = this.factory.getTagDictionary();
         if (dict == null) {
           dict = this.factory.createEmptyTagDictionary();
-          this.factory.setTagDictionary(dict);
         }
         if (dict instanceof MutableTagDictionary) {
           POSTaggerME.populatePOSDictionary(trainingSampleStream, (MutableTagDictionary)dict,
@@ -136,6 +125,12 @@ public class POSTaggerCrossValidator {
         trainingSampleStream.reset();
       }
 
+      if (this.factory == null) {
+        this.factory = POSTaggerFactory.create(this.factoryClassName, null, null);
+      }
+
+      factory.init(featureGeneratorBytes, resources, dict);
+
       POSModel model = POSTaggerME.train(languageCode, trainingSampleStream,
           params, this.factory);
 
@@ -148,7 +143,6 @@ public class POSTaggerCrossValidator {
       if (this.tagdicCutoff != null) {
         this.factory.setTagDictionary(null);
       }
-
     }
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
index eb5466e..37143c9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
@@ -17,6 +17,8 @@
 
 package opennlp.tools.postag;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -29,10 +31,15 @@ import java.util.Set;
 
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.namefind.TokenNameFinderFactory;
 import opennlp.tools.util.BaseToolFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.Version;
 import opennlp.tools.util.ext.ExtensionLoader;
+import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
+import opennlp.tools.util.featuregen.AggregatedFeatureGenerator;
+import opennlp.tools.util.featuregen.GeneratorFactory;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.UncloseableInputStream;
 
@@ -44,7 +51,10 @@ public class POSTaggerFactory extends BaseToolFactory {
   private static final String TAG_DICTIONARY_ENTRY_NAME = "tags.tagdict";
   private static final String NGRAM_DICTIONARY_ENTRY_NAME = "ngram.dictionary";
 
+
   protected Dictionary ngramDictionary;
+  private byte[] featureGeneratorBytes;
+  private Map<String, Object> resources;
   protected TagDictionary posDictionary;
 
   /**
@@ -60,23 +70,127 @@ public class POSTaggerFactory extends BaseToolFactory {
    *
    * @param ngramDictionary
    * @param posDictionary
+   *
+   * @deprecated this constructor is here for backward compatibility and
+   *             is not functional anymore in the training of 1.8.x series models
    */
-  public POSTaggerFactory(Dictionary ngramDictionary,
-      TagDictionary posDictionary) {
+  @Deprecated
+  public POSTaggerFactory(Dictionary ngramDictionary, TagDictionary posDictionary) {
     this.init(ngramDictionary, posDictionary);
+
+    // TODO: This could be made functional by creating some default feature generation
+    // which uses the dictionary ...
+  }
+
+  public POSTaggerFactory(byte[] featureGeneratorBytes, final Map<String, Object> resources,
+                          TagDictionary posDictionary) {
+    this.featureGeneratorBytes = featureGeneratorBytes;
+
+    if (this.featureGeneratorBytes == null) {
+      this.featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
+    }
+
+    this.resources = resources;
+    this.posDictionary = posDictionary;
   }
 
+  @Deprecated // will be removed when only 8 series models are supported
   protected void init(Dictionary ngramDictionary, TagDictionary posDictionary) {
     this.ngramDictionary = ngramDictionary;
     this.posDictionary = posDictionary;
   }
 
+  protected void init(byte[] featureGeneratorBytes, final Map<String, Object> resources,
+                      TagDictionary posDictionary) {
+    this.featureGeneratorBytes = featureGeneratorBytes;
+    this.resources = resources;
+    this.posDictionary = posDictionary;
+  }
+  private static byte[] loadDefaultFeatureGeneratorBytes() {
+
+    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+    try (InputStream in = TokenNameFinderFactory.class.getResourceAsStream(
+        "/opennlp/tools/postag/pos-default-features.xml")) {
+
+      if (in == null) {
+        throw new IllegalStateException("Classpath must contain pos-default-features.xml file!");
+      }
+
+      byte[] buf = new byte[1024];
+      int len;
+      while ((len = in.read(buf)) > 0) {
+        bytes.write(buf, 0, len);
+      }
+    }
+    catch (IOException e) {
+      throw new IllegalStateException("Failed reading from pos-default-features.xml file on classpath!");
+    }
+
+    return bytes.toByteArray();
+  }
+
+  /**
+   * Creates the {@link AdaptiveFeatureGenerator}. Usually this
+   * is a set of generators contained in the {@link AggregatedFeatureGenerator}.
+   *
+   * Note:
+   * The generators are created on every call to this method.
+   *
+   * @return the feature generator or null if there is no descriptor in the model
+   */
+  public AdaptiveFeatureGenerator createFeatureGenerators() {
+
+    if (featureGeneratorBytes == null && artifactProvider != null) {
+      featureGeneratorBytes = artifactProvider.getArtifact(
+          POSModel.GENERATOR_DESCRIPTOR_ENTRY_NAME);
+    }
+
+    if (featureGeneratorBytes == null) {
+      featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
+    }
+
+    InputStream descriptorIn = new ByteArrayInputStream(featureGeneratorBytes);
+
+    AdaptiveFeatureGenerator generator;
+    try {
+      generator = GeneratorFactory.create(descriptorIn, key -> {
+        if (artifactProvider != null) {
+          return artifactProvider.getArtifact(key);
+        }
+        else {
+          return resources.get(key);
+        }
+      });
+    } catch (InvalidFormatException e) {
+      // It is assumed that the creation of the feature generation does not
+      // fail after it succeeded once during model loading.
+
+      // But it might still be possible that such an exception is thrown,
+      // in this case the caller should not be forced to handle the exception
+      // and a Runtime Exception is thrown instead.
+
+      // If the re-creation of the feature generation fails it is assumed
+      // that this can only be caused by a programming mistake and therefore
+      // throwing a Runtime Exception is reasonable
+
+      throw new IllegalStateException(); // FeatureGeneratorCreationError(e);
+    } catch (IOException e) {
+      throw new IllegalStateException("Reading from mem cannot result in an I/O error", e);
+    }
+
+    return generator;
+  }
+
   @Override
   @SuppressWarnings("rawtypes")
   public Map<String, ArtifactSerializer> createArtifactSerializersMap() {
     Map<String, ArtifactSerializer> serializers = super.createArtifactSerializersMap();
-    POSDictionarySerializer.register(serializers);
-    // the ngram Dictionary uses a base serializer, we don't need to add it here.
+
+    // NOTE: This is only needed for old models and this if can be removed if support is dropped
+    if (Version.currentVersion().getMinor() < 8) {
+      POSDictionarySerializer.register(serializers);
+    }
+
     return serializers;
   }
 
@@ -111,18 +225,37 @@ public class POSTaggerFactory extends BaseToolFactory {
     this.posDictionary = dictionary;
   }
 
+  protected Map<String, Object> getResources() {
+
+
+    if (resources != null) {
+      return resources;
+    }
+
+    return Collections.emptyMap();
+  }
+
+  protected byte[] getFeatureGenerator() {
+    return featureGeneratorBytes;
+  }
+
   public TagDictionary getTagDictionary() {
     if (this.posDictionary == null && artifactProvider != null)
       this.posDictionary = artifactProvider.getArtifact(TAG_DICTIONARY_ENTRY_NAME);
     return this.posDictionary;
   }
 
+  /**
+   * @deprecated this will be reduced in visibility and later removed
+   */
+  @Deprecated
   public Dictionary getDictionary() {
     if (this.ngramDictionary == null && artifactProvider != null)
       this.ngramDictionary = artifactProvider.getArtifact(NGRAM_DICTIONARY_ENTRY_NAME);
     return this.ngramDictionary;
   }
 
+  @Deprecated
   public void setDictionary(Dictionary ngramDict) {
     if (artifactProvider != null) {
       throw new IllegalStateException(
@@ -132,10 +265,14 @@ public class POSTaggerFactory extends BaseToolFactory {
   }
 
   public POSContextGenerator getPOSContextGenerator() {
-    return new DefaultPOSContextGenerator(0, getDictionary());
+    return getPOSContextGenerator(0);
   }
 
   public POSContextGenerator getPOSContextGenerator(int cacheSize) {
+    if (Version.currentVersion().getMinor() >= 8) {
+      return new ConfigurablePOSContextGenerator(cacheSize, createFeatureGenerators());
+    }
+
     return new DefaultPOSContextGenerator(cacheSize, getDictionary());
   }
 
@@ -143,7 +280,9 @@ public class POSTaggerFactory extends BaseToolFactory {
     return new DefaultPOSSequenceValidator(getTagDictionary());
   }
 
-  static class POSDictionarySerializer implements ArtifactSerializer<POSDictionary> {
+  // TODO: This should not be done anymore for 8 models, they can just
+  // use the SerializableArtifact interface
+  public static class POSDictionarySerializer implements ArtifactSerializer<POSDictionary> {
 
     public POSDictionary create(InputStream in) throws IOException {
       return POSDictionary.create(new UncloseableInputStream(in));
@@ -218,6 +357,7 @@ public class POSTaggerFactory extends BaseToolFactory {
 
   }
 
+  @Deprecated
   public static POSTaggerFactory create(String subclassName,
       Dictionary ngramDictionary, TagDictionary posDictionary)
       throws InvalidFormatException {
@@ -233,11 +373,34 @@ public class POSTaggerFactory extends BaseToolFactory {
     } catch (Exception e) {
       String msg = "Could not instantiate the " + subclassName
           + ". The initialization throw an exception.";
-      System.err.println(msg);
-      e.printStackTrace();
       throw new InvalidFormatException(msg, e);
     }
+  }
+
+  public static POSTaggerFactory create(String subclassName, byte[] featureGeneratorBytes,
+                                        Map<String, Object> resources, TagDictionary posDictionary)
+      throws InvalidFormatException {
+
+    POSTaggerFactory theFactory;
+
+    if (subclassName == null) {
+      // will create the default factory
+      theFactory = new POSTaggerFactory(null, posDictionary);
+    }
+    else {
+      try {
+        theFactory = ExtensionLoader.instantiateExtension(
+            POSTaggerFactory.class, subclassName);
+      } catch (Exception e) {
+        String msg = "Could not instantiate the " + subclassName
+            + ". The initialization throw an exception.";
+        throw new InvalidFormatException(msg, e);
+      }
+    }
+
+    theFactory.init(featureGeneratorBytes, resources, posDictionary);
 
+    return theFactory;
   }
 
   public TagDictionary createEmptyTagDictionary() {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index ef08cfb..a1ac72b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -489,6 +489,17 @@ public class GeneratorFactory {
     }
   }
 
+  static class PosTaggerFeatureGeneratorFactory implements XmlFeatureGeneratorFactory {
+    public AdaptiveFeatureGenerator create(Element generatorElement,
+                                           FeatureGeneratorResourceProvider resourceManager) {
+      return new PosTaggerFeatureGenerator();
+    }
+
+    static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
+      factoryMap.put("postagger", new PosTaggerFeatureGeneratorFactory());
+    }
+  }
+
   /**
    * @see WindowFeatureGenerator
    */
@@ -658,6 +669,7 @@ public class GeneratorFactory {
     TokenFeatureGeneratorFactory.register(factories);
     BigramNameFeatureGeneratorFactory.register(factories);
     TokenPatternFeatureGeneratorFactory.register(factories);
+    PosTaggerFeatureGeneratorFactory.register(factories);
     PrefixFeatureGeneratorFactory.register(factories);
     SuffixFeatureGeneratorFactory.register(factories);
     WindowFeatureGeneratorFactory.register(factories);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGenerator.java
new file mode 100644
index 0000000..c32baec
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGenerator.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.List;
+
+public class PosTaggerFeatureGenerator implements AdaptiveFeatureGenerator {
+
+  private final String SB = "S=begin";
+
+  @Override
+  public void createFeatures(List<String> features, String[] tokens, int index,
+                             String[] tags) {
+
+    String prev, prevprev = null;
+    String tagprev, tagprevprev;
+    tagprev = tagprevprev = null;
+
+    if (index - 1 >= 0) {
+      prev =  tokens[index - 1];
+      tagprev =  tags[index - 1];
+
+      if (index - 2 >= 0) {
+        prevprev = tokens[index - 2];
+        tagprevprev = tags[index - 2];
+      }
+      else {
+        prevprev = SB;
+      }
+    }
+    else {
+      prev = SB;
+    }
+
+    // add the words and pos's of the surrounding context
+    if (prev != null) {
+      if (tagprev != null) {
+        features.add("t=" + tagprev);
+      }
+      if (prevprev != null) {
+        if (tagprevprev != null) {
+          features.add("t2=" + tagprevprev + "," + tagprev);
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
new file mode 100644
index 0000000..0be1fc8
--- /dev/null
+++ b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
@@ -0,0 +1,38 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+
+
+<!-- Default pos tagger feature generator configuration -->
+<generators>
+    <cache>
+        <generators>
+            <definition/>
+            <suffix/>
+            <prefix/>
+            <window prevLength = "2" nextLength = "2">
+                <token/>
+            </window>
+            <window prevLength = "2" nextLength = "2">
+                <sentence begin="true" end="false"/>
+            </window>
+            <tokenclass/>
+            <postagger/>
+        </generators>
+    </cache>
+</generators>

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/test/java/opennlp/tools/postag/ConfigurablePOSContextGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/ConfigurablePOSContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/ConfigurablePOSContextGeneratorTest.java
new file mode 100644
index 0000000..f00e855
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/ConfigurablePOSContextGeneratorTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.postag;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
+import opennlp.tools.util.featuregen.TokenFeatureGenerator;
+
+public class ConfigurablePOSContextGeneratorTest {
+
+  private void testContextGeneration(int cacheSize) {
+    AdaptiveFeatureGenerator fg = new TokenFeatureGenerator();
+    ConfigurablePOSContextGenerator cg = new ConfigurablePOSContextGenerator(cacheSize, fg);
+
+    String[] tokens = new String[] {"a", "b", "c", "d", "e"};
+    String[] tags = new String[] {"t_a", "t_b", "t_c", "t_d", "t_e"};
+
+    cg.getContext(0, tokens, tags, null);
+
+    Assert.assertEquals(1, cg.getContext(0, tokens, tags, null).length);
+    Assert.assertEquals("w=a", cg.getContext(0, tokens, tags, null)[0]);
+    Assert.assertEquals("w=b", cg.getContext(1, tokens, tags, null)[0]);
+    Assert.assertEquals("w=c", cg.getContext(2, tokens, tags, null)[0]);
+    Assert.assertEquals("w=d", cg.getContext(3, tokens, tags, null)[0]);
+    Assert.assertEquals("w=e", cg.getContext(4, tokens, tags, null)[0]);
+  }
+
+  @Test
+  public void testWithoutCache() {
+    testContextGeneration(0);
+  }
+
+  @Test
+  public void testWithCache() {
+    testContextGeneration(3);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java b/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
index e0ce2a6..91228fc 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
@@ -36,8 +36,8 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
   public DummyPOSTaggerFactory() {
   }
 
-  public DummyPOSTaggerFactory(Dictionary ngramDictionary, DummyPOSDictionary posDictionary) {
-    super(ngramDictionary, null);
+  public DummyPOSTaggerFactory(DummyPOSDictionary posDictionary) {
+    super(null, null, null);
     this.dict = posDictionary;
   }
 
@@ -81,7 +81,7 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
 
   }
 
-  static class DummyPOSDictionarySerializer implements ArtifactSerializer<DummyPOSDictionary> {
+  public static class DummyPOSDictionarySerializer implements ArtifactSerializer<DummyPOSDictionary> {
 
     public DummyPOSDictionary create(InputStream in) throws IOException {
       return DummyPOSDictionary.create(new UncloseableInputStream(in));
@@ -106,6 +106,9 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
 
     private POSDictionary dict;
 
+    public DummyPOSDictionary() {
+    }
+
     public DummyPOSDictionary(POSDictionary dict) {
       this.dict = dict;
     }
@@ -123,6 +126,9 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
       return dict.getTags(word);
     }
 
+    @Override
+    public Class<?> getArtifactSerializerClass() {
+      return DummyPOSDictionarySerializer.class;
+    }
   }
-
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/dd39d066/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
index edb20b3..b98d3bf 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
@@ -25,7 +25,6 @@ import java.nio.charset.StandardCharsets;
 import org.junit.Assert;
 import org.junit.Test;
 
-import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.formats.ResourceAsStreamFactory;
 import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSContextGenerator;
 import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSDictionary;
@@ -62,9 +61,8 @@ public class POSTaggerFactoryTest {
     DummyPOSDictionary posDict = new DummyPOSDictionary(
         POSDictionary.create(POSDictionaryTest.class
             .getResourceAsStream("TagDictionaryCaseSensitive.xml")));
-    Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);
 
-    POSModel posModel = trainPOSModel(new DummyPOSTaggerFactory(dic, posDict));
+    POSModel posModel = trainPOSModel(new DummyPOSTaggerFactory(posDict));
 
     POSTaggerFactory factory = posModel.getFactory();
     Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
@@ -81,22 +79,18 @@ public class POSTaggerFactoryTest {
     Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary);
     Assert.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator);
     Assert.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator);
-    Assert.assertTrue(factory.getDictionary() != null);
   }
 
   @Test
   public void testPOSTaggerWithDefaultFactory() throws IOException {
     POSDictionary posDict = POSDictionary.create(POSDictionaryTest.class
             .getResourceAsStream("TagDictionaryCaseSensitive.xml"));
-    Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);
-
-    POSModel posModel = trainPOSModel(new POSTaggerFactory(dic, posDict));
+    POSModel posModel = trainPOSModel(new POSTaggerFactory(null, null, posDict));
 
     POSTaggerFactory factory = posModel.getFactory();
     Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary);
     Assert.assertTrue(factory.getPOSContextGenerator() != null);
     Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
-    Assert.assertTrue(factory.getDictionary() != null);
 
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     posModel.serialize(out);
@@ -108,7 +102,6 @@ public class POSTaggerFactoryTest {
     Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary);
     Assert.assertTrue(factory.getPOSContextGenerator() != null);
     Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator);
-    Assert.assertTrue(factory.getDictionary() != null);
   }
 
   @Test(expected = InvalidFormatException.class)

[37/50] [abbrv] opennlp git commit: OPENNLP-1000: Add a test case for the BilouNameFinderSequenceValidator

Posted by jo...@apache.org.

OPENNLP-1000: Add a test case for the BilouNameFinderSequenceValidator

This closes #139


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5c9f6ab3
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5c9f6ab3
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5c9f6ab3

Branch: refs/heads/parser_regression
Commit: 5c9f6ab305ae806580f720fb738dac125e42e0b5
Parents: 17493d1
Author: Peter Thygesen <pe...@gmail.com>
Authored: Mon Mar 13 00:49:49 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:56 2017 +0200

----------------------------------------------------------------------
 .../BilouNameFinderSequenceValidator.java       |  23 +-
 .../BilouNameFinderSequenceValidatorTest.java   | 435 +++++++++++++++++++
 2 files changed, 449 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/5c9f6ab3/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
index 6e73504..19700fb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java
@@ -23,19 +23,22 @@ public class BilouNameFinderSequenceValidator implements
     SequenceValidator<String> {
 
   public boolean validSequence(int i, String[] inputSequence,
-      String[] outcomesSequence, String outcome) {
+                               String[] outcomesSequence, String outcome) {
 
-    if (outcome.endsWith(NameFinderME.CONTINUE) || outcome.endsWith(BilouCodec.LAST)) {
+    if (outcome.endsWith(BilouCodec.CONTINUE) || outcome.endsWith(BilouCodec.LAST)) {
 
       int li = outcomesSequence.length - 1;
 
       if (li == -1) {
         return false;
-      } else if (outcomesSequence[li].endsWith(NameFinderME.OTHER) ||
+      } else if (outcomesSequence[li].endsWith(BilouCodec.OTHER) ||
           outcomesSequence[li].endsWith(BilouCodec.UNIT)) {
         return false;
-      } else if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE) ||
-          outcomesSequence[li].endsWith(NameFinderME.START)) {
+      } else if (outcomesSequence[li].endsWith(BilouCodec.LAST) &&
+          (outcome.endsWith(BilouCodec.CONTINUE) || outcome.endsWith(BilouCodec.LAST))) {
+        return false;
+      } else if (outcomesSequence[li].endsWith(BilouCodec.CONTINUE) ||
+          outcomesSequence[li].endsWith(BilouCodec.START)) {
         // if it is continue, we have to check if previous match was of the same type
         String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]);
         String nameType = NameFinderME.extractNameType(outcome);
@@ -50,10 +53,12 @@ public class BilouNameFinderSequenceValidator implements
       }
     }
 
-    if (outcomesSequence.length - 1 > 0) {
-      if (outcome.endsWith(NameFinderME.OTHER)) {
-        if (outcomesSequence[outcomesSequence.length - 1].endsWith(NameFinderME.START)
-            || outcomesSequence[outcomesSequence.length - 1].endsWith(NameFinderME.CONTINUE)) {
+    if (outcomesSequence.length > 0) {
+      if (outcome.endsWith(BilouCodec.START)
+          || outcome.endsWith(BilouCodec.OTHER)
+          || outcome.endsWith(BilouCodec.UNIT)) {
+        if (outcomesSequence[outcomesSequence.length - 1].endsWith(BilouCodec.START)
+            || outcomesSequence[outcomesSequence.length - 1].endsWith(BilouCodec.CONTINUE)) {
           return false;
         }
       }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5c9f6ab3/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java
new file mode 100644
index 0000000..a234beb
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouNameFinderSequenceValidatorTest.java
@@ -0,0 +1,435 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.namefind;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * This is the test class for {@link BilouNameFinderSequenceValidator}..
+ * inputSequence is actually not used, but provided in the test to describe the cases.
+ */
+public class BilouNameFinderSequenceValidatorTest {
+
+  private static BilouNameFinderSequenceValidator validator = new BilouNameFinderSequenceValidator();
+  private static String START_A = "TypeA-" + BilouCodec.START;
+  private static String CONTINUE_A = "TypeA-" + BilouCodec.CONTINUE;
+  private static String LAST_A = "TypeA-" + BilouCodec.LAST;
+  private static String UNIT_A = "TypeA-" + BilouCodec.UNIT;
+
+
+  private static String START_B = "TypeB-" + BilouCodec.START;
+  private static String CONTINUE_B = "TypeB-" + BilouCodec.CONTINUE;
+  private static String LAST_B = "TypeB-" + BilouCodec.LAST;
+
+  //private static String UNIT = BilouCodec.UNIT;
+  private static String OTHER = BilouCodec.OTHER;
+
+  @Test
+  public void testStartAsFirstLabel() {
+    String outcome = START_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "something"};
+    String[] outcomesSequence = new String[] { };
+    Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testContinueAsFirstLabel() {
+    String outcome = CONTINUE_A;
+    String[] inputSequence = new String[] {"TypeA", "something", "something"};
+    String[] outcomesSequence = new String[] { };
+    Assert.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testLastAsFirstLabel() {
+    String outcome = LAST_A;
+    String[] inputSequence = new String[] {"TypeA", "something", "something"};
+    String[] outcomesSequence = new String[] { };
+    Assert.assertFalse(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testUnitAsFirstLabel() {
+    String outcome = UNIT_A;
+    String[] inputSequence = new String[] {"TypeA", "something", "something"};
+    String[] outcomesSequence = new String[] { };
+    Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+  }
+
+  @Test
+  public void testOtherAsFirstLabel() {
+    String outcome = OTHER;
+    String[] inputSequence = new String[] {"something", "TypeA", "something"};
+    String[] outcomesSequence = new String[] { };
+    Assert.assertTrue(validator.validSequence(0, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Start, Any Start => Invalid
+   */
+  @Test
+  public void testBeginFollowedByBegin() {
+
+    String[] outcomesSequence = new String[] {START_A};
+
+    // Same Types
+    String outcome = START_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "something"};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+    // Diff. Types
+    outcome = START_B;
+    inputSequence = new String[] {"TypeA", "TypeB", "something"};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Start, Continue, Same type => Valid
+   * Start, Continue, Diff. Type => Invalid
+   */
+  @Test
+  public void testBeginFollowedByContinue() {
+
+    String[] outcomesSequence = new String[] {START_A};
+
+    // Same Types
+    String outcome = CONTINUE_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "something"};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+    // Different Types
+    outcome = CONTINUE_B;
+    inputSequence = new String[] {"TypeA", "TypeB", "TypeB", "something"};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Start, Last, Same Type => Valid
+   * Start, Last, Diff. Type => Invalid
+   */
+  @Test
+  public void testStartFollowedByLast() {
+
+    String[] outcomesSequence = new String[] {START_A};
+
+    // Same Type
+    String outcome = LAST_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "something"};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+
+    // Diff. Types
+    outcome = LAST_B;
+    inputSequence = new String[] {"TypeA", "TypeB", "something"};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Start, Other => Invalid
+   */
+  @Test
+  public void testStartFollowedByOther() {
+    String outcome = OTHER;
+    String[] inputSequence = new String[] {"TypeA", "something", "something"};
+    String[] outcomesSequence = new String[] {START_A};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   *    Start, Unit => Invalid
+   */
+  @Test
+  public void testStartFollowedByUnit() {
+    String outcome = UNIT_A;
+    String[] inputSequence = new String[] {"TypeA", "AnyType", "something"};
+    String[] outcomesSequence = new String[] {START_A};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Continue, Any Begin => Invalid
+   */
+  @Test
+  public void testContinueFollowedByStart() {
+
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+
+    // Same Types
+    String outcome = START_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "something"};
+    Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+    // Diff. Types
+    outcome = START_B;
+    inputSequence = new String[] {"TypeA", "TypeA", "TypeB", "something"};
+    Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+
+  }
+
+  /**
+   * Continue, Continue, Same type => Valid
+   * Continue, Continue, Diff. Type => Invalid
+   */
+  @Test
+  public void testContinueFollowedByContinue() {
+
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A, CONTINUE_A};
+
+    // Same Types
+    String outcome = CONTINUE_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeA", "something"};
+    Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+
+    // Different Types
+    outcome = CONTINUE_B;
+    inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeB", "something"};
+    Assert.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Continue, Last, Same Type => Valid
+   * Continue, Last, Diff. Type => Invalid
+   */
+  @Test
+  public void testContinueFollowedByLast() {
+
+    String[] outcomesSequence = new String[] {OTHER, START_A, CONTINUE_A};
+
+    // Same Types
+    String outcome = LAST_A;
+    String[] inputSequence = new String[] {"something", "TypeA", "TypeA", "TypeA", "something"};
+    Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+
+    // Different Types
+    outcome = LAST_B;
+    inputSequence = new String[] {"something", "TypeA", "TypeA", "TypeB", "something"};
+    Assert.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Continue, Other => Invalid
+   */
+  @Test
+  public void testContinueFollowedByOther() {
+    String outcome = OTHER;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "something", "something"};
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+    Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Continue, Unit => Invalid
+   */
+  @Test
+  public void testContinueFollowedByUnit() {
+    String outcome = UNIT_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "AnyType", "something"};
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A};
+    Assert.assertFalse(validator.validSequence(2, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Last, Any Start => Valid
+   */
+  @Test
+  public void testLastFollowedByStart() {
+
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A, LAST_A};
+
+    // Same Types
+    String outcome = START_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeA", "TypeA"};
+    Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+
+    // Same Types
+    outcome = START_B;
+    inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeB", "TypeB"};
+    Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   *    Last, Any Continue => Invalid
+   */
+  @Test
+  public void testLastFollowedByContinue() {
+
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A, LAST_A};
+
+    String outcome = CONTINUE_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeA", "something"};
+    Assert.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+
+    // Diff. Types
+    outcome = CONTINUE_B;
+    inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "TypeB", "something"};
+    Assert.assertFalse(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   *    Last, Any Last => Invalid
+   */
+  @Test
+  public void testLastFollowedByLast() {
+
+    String[] outcomesSequence = new String[] {OTHER, OTHER, START_A, CONTINUE_A, LAST_A};
+
+    // Same Types
+    String outcome = LAST_A;
+    String[] inputSequence = new String[] {"something", "something", "TypeA",
+        "TypeA", "TypeA", "TypeA", "something"};
+    Assert.assertFalse(validator.validSequence(5, inputSequence, outcomesSequence, outcome));
+
+    // Diff. Types
+    outcome = LAST_B;
+    inputSequence = new String[] {"something", "something", "TypeA", "TypeA",
+        "TypeA", "TypeB", "something"};
+    Assert.assertFalse(validator.validSequence(5, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Last, Other => Valid
+   */
+  @Test
+  public void testLastFollowedByOther() {
+    String outcome = OTHER;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "something", "something"};
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A, LAST_A};
+    Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Last, Unit => Valid
+   */
+  @Test
+  public void testLastFollowedByUnit() {
+    String outcome = UNIT_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "TypeA", "AnyType", "something"};
+    String[] outcomesSequence = new String[] {START_A, CONTINUE_A, LAST_A};
+    Assert.assertTrue(validator.validSequence(3, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Other, Any Start => Valid
+   */
+  @Test
+  public void testOtherFollowedByBegin() {
+    String outcome = START_A;
+    String[] inputSequence = new String[] {"something", "TypeA", "TypeA"};
+    String[] outcomesSequence = new String[] {OTHER};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Other, Any Continue => Invalid
+   */
+  @Test
+  public void testOtherFollowedByContinue() {
+    String outcome = CONTINUE_A;
+    String[] inputSequence = new String[] {"something", "TypeA", "TypeA"};
+    String[] outcomesSequence = new String[] {OTHER};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Other, Any Last => Invalid
+   */
+  @Test
+  public void testOtherFollowedByLast() {
+    String outcome = LAST_A;
+    String[] inputSequence = new String[] {"something", "TypeA", "TypeA"};
+    String[] outcomesSequence = new String[] {OTHER};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Outside, Unit => Valid
+   */
+  @Test
+  public void testOtherFollowedByUnit() {
+    String outcome = UNIT_A;
+    String[] inputSequence = new String[] {"something", "AnyType", "something"};
+    String[] outcomesSequence = new String[] {OTHER};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Other, Other => Valid
+   */
+  @Test
+  public void testOutsideFollowedByOutside() {
+    String outcome = OTHER;
+    String[] inputSequence = new String[] {"something", "something", "something"};
+    String[] outcomesSequence = new String[] {OTHER};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Unit, Any Start => Valid
+   */
+  @Test
+  public void testUnitFollowedByBegin() {
+    String outcome = START_A;
+    String[] inputSequence = new String[] {"AnyType", "TypeA", "something"};
+    String[] outcomesSequence = new String[] {UNIT_A};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Unit, Any Continue => Invalid
+   */
+  @Test
+  public void testUnitFollowedByInside() {
+    String outcome = CONTINUE_A;
+    String[] inputSequence = new String[] {"TypeA", "TypeA", "something"};
+    String[] outcomesSequence = new String[] {UNIT_A};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Unit, Any Last => Invalid
+   */
+  @Test
+  public void testUnitFollowedByLast() {
+    String outcome = LAST_A;
+    String[] inputSequence = new String[] {"AnyType", "TypeA", "something"};
+    String[] outcomesSequence = new String[] {UNIT_A};
+    Assert.assertFalse(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Unit, Other => Valid
+   */
+  @Test
+  public void testUnitFollowedByOutside() {
+    String outcome = OTHER;
+    String[] inputSequence = new String[] {"TypeA", "something", "something"};
+    String[] outcomesSequence = new String[] {UNIT_A};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+  /**
+   * Unit, Unit => Valid
+   */
+  @Test
+  public void testUnitFollowedByUnit() {
+    String outcome = UNIT_A;
+    String[] inputSequence = new String[] {"AnyType", "AnyType", "something"};
+    String[] outcomesSequence = new String[] {UNIT_A};
+    Assert.assertTrue(validator.validSequence(1, inputSequence, outcomesSequence, outcome));
+  }
+
+}

[09/50] [abbrv] opennlp git commit: OPENNLP-984: Remove type parameter from POS Tagger Trainer cli

Posted by jo...@apache.org.

OPENNLP-984: Remove type parameter from POS Tagger Trainer cli


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/672f1b09
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/672f1b09
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/672f1b09

Branch: refs/heads/parser_regression
Commit: 672f1b09dfcb1c854f930b6f1c7911e41141e1fa
Parents: cd23b58
Author: J�rn Kottmann <jo...@apache.org>
Authored: Sat Feb 11 00:20:45 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:52 2017 +0200

----------------------------------------------------------------------
 .../cmdline/postag/POSTaggerTrainerTool.java    | 25 --------------------
 .../tools/cmdline/postag/TrainingParams.java    |  6 -----
 2 files changed, 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/672f1b09/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
index 4a78602..1e6fb54 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java
@@ -34,8 +34,6 @@ import opennlp.tools.postag.POSTaggerFactory;
 import opennlp.tools.postag.POSTaggerME;
 import opennlp.tools.postag.TagDictionary;
 import opennlp.tools.util.InvalidFormatException;
-import opennlp.tools.util.TrainingParameters;
-import opennlp.tools.util.model.ModelType;
 import opennlp.tools.util.model.ModelUtil;
 
 public final class POSTaggerTrainerTool
@@ -63,7 +61,6 @@ public final class POSTaggerTrainerTool
 
     if (mlParams == null) {
       mlParams = ModelUtil.createDefaultTrainingParameters();
-      mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(params.getType()).toString());
     }
 
     File modelOutFile = params.getModel();
@@ -142,26 +139,4 @@ public final class POSTaggerTrainerTool
 
     CmdLineUtil.writeModel("pos tagger", modelOutFile, model);
   }
-
-  static ModelType getModelType(String modelString) {
-    ModelType model;
-    if (modelString == null)
-      modelString = "maxent";
-
-    switch (modelString) {
-      case "maxent":
-        model = ModelType.MAXENT;
-        break;
-      case "perceptron":
-        model = ModelType.PERCEPTRON;
-        break;
-      case "perceptron_sequence":
-        model = ModelType.PERCEPTRON_SEQUENCE;
-        break;
-      default:
-        model = null;
-        break;
-    }
-    return model;
-  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/672f1b09/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
index 221dcbe..690b359 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java
@@ -29,12 +29,6 @@ import opennlp.tools.cmdline.params.BasicTrainingParams;
  * Note: Do not use this class, internal use only!
  */
 interface TrainingParams extends BasicTrainingParams {
-
-  @ParameterDescription(valueName = "maxent|perceptron|perceptron_sequence",
-      description = "The type of the token name finder model. One of maxent|perceptron|perceptron_sequence.")
-  @OptionalParameter(defaultValue = "maxent")
-  String getType();
-
   @ParameterDescription(valueName = "dictionaryPath", description = "The XML tag dictionary file")
   @OptionalParameter
   File getDict();

[14/50] [abbrv] opennlp git commit: OPENNLP-980: Deprecate low-level feature constructors and methods

Posted by jo...@apache.org.

OPENNLP-980: Deprecate low-level feature constructors and methods


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/ac787a4d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/ac787a4d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/ac787a4d

Branch: refs/heads/parser_regression
Commit: ac787a4dabf9823bf8b7e91f1b73d60d17e01006
Parents: b41fcd6
Author: J�rn Kottmann <jo...@apache.org>
Authored: Wed Feb 1 21:38:19 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:52 2017 +0200

----------------------------------------------------------------------
 .../src/main/java/opennlp/tools/ml/maxent/GISModel.java     | 1 +
 .../java/opennlp/tools/ml/maxent/quasinewton/QNModel.java   | 1 +
 .../src/main/java/opennlp/tools/ml/model/AbstractModel.java | 9 +++++++++
 .../java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java   | 2 ++
 .../java/opennlp/tools/ml/perceptron/PerceptronModel.java   | 5 +++++
 5 files changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
index e546d1c..14c7fa3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java
@@ -152,6 +152,7 @@ public final class GISModel extends AbstractModel {
    *         string representation of the outcomes can be obtained from the
    *         method getOutcome(int i).
    */
+  @Deprecated // visibility will be reduced in 1.8.1
   public static double[] eval(int[] context, float[] values, double[] prior,
       EvalParameters model) {
     Context[] params = model.getParams();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
index a35d54c..f02ee75 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
@@ -103,6 +103,7 @@ public class QNModel extends AbstractModel {
    *          Model parameters
    * @return Normalized probabilities for the outcomes given the context.
    */
+  @Deprecated // visibility will be reduced in 1.8.1
   public static double[] eval(int[] context, float[] values, double[] probs,
       int nOutcomes, int nPredLabels, double[] parameters) {
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
index e5a60a7..eb80f1b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
@@ -39,6 +39,15 @@ public abstract class AbstractModel implements MaxentModel {
   /** The type of the model. */
   protected ModelType modelType;
 
+  /**
+   * @deprecated this will be removed in 1.8.1, pmap should be private
+   *
+   * @param params
+   * @param predLabels
+   * @param pmap
+   * @param outcomeNames
+   */
+  @Deprecated
   public AbstractModel(Context[] params, String[] predLabels,
       Map<String, Integer> pmap, String[] outcomeNames) {
     this.pmap = pmap;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
index ec3d9bd..0a28704 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
@@ -31,6 +31,7 @@ public class NaiveBayesModel extends AbstractModel {
   protected double[] outcomeTotals;
   protected long vocabulary;
 
+  @Deprecated
   public NaiveBayesModel(Context[] params, String[] predLabels, Map<String, Integer> pmap,
                          String[] outcomeNames) {
     super(params, predLabels, pmap, outcomeNames);
@@ -87,6 +88,7 @@ public class NaiveBayesModel extends AbstractModel {
     return eval(context, null, prior, model, true);
   }
 
+  @Deprecated // visibility will be reduced in 1.8.1
   public static double[] eval(int[] context, float[] values, double[] prior,
                               EvalParameters model, boolean normalize) {
     Probabilities<Integer> probabilities = new LogProbabilities<>();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/ac787a4d/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java
index 146da08..d66b553 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java
@@ -25,6 +25,10 @@ import opennlp.tools.ml.model.EvalParameters;
 
 public class PerceptronModel extends AbstractModel {
 
+  /**
+   * @deprecated this will be removed in 1.8.1, pmap should be private
+   */
+  @Deprecated
   public PerceptronModel(Context[] params, String[] predLabels, Map<String, Integer> pmap,
                          String[] outcomeNames) {
     super(params,predLabels,pmap,outcomeNames);
@@ -62,6 +66,7 @@ public class PerceptronModel extends AbstractModel {
     return eval(context,null,prior,model,true);
   }
 
+  @Deprecated // visibility will be reduced in 1.8.1
   public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model,
                               boolean normalize) {
     Context[] params = model.getParams();

[41/50] [abbrv] opennlp git commit: OPENNLP-1004: Write a test case for the BilouCodec class

Posted by jo...@apache.org.

OPENNLP-1004: Write a test case for the BilouCodec class

This closes #142


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/bc99b72f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/bc99b72f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/bc99b72f

Branch: refs/heads/parser_regression
Commit: bc99b72feaa8e3416feaa5df59c6198bfe7dbd8b
Parents: 5c9f6ab
Author: Peter Thygesen <pe...@gmail.com>
Authored: Wed Mar 15 18:54:26 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:56 2017 +0200

----------------------------------------------------------------------
 .../opennlp/tools/namefind/BilouCodecTest.java  | 209 +++++++++++++++++++
 1 file changed, 209 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/bc99b72f/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
new file mode 100644
index 0000000..96d939f
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BilouCodecTest.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.Span;
+
+/**
+ * This is the test class for {@link BilouCodec}.
+ */
+public class BilouCodecTest {
+
+  private static final BilouCodec codec = new BilouCodec();
+
+  private static final String A_TYPE = "atype";
+  private static final String A_START = A_TYPE + "-" + BilouCodec.START;
+  private static final String A_CONTINUE = A_TYPE + "-" + BilouCodec.CONTINUE;
+  private static final String A_LAST = A_TYPE + "-" + BilouCodec.LAST;
+  private static final String A_UNIT = A_TYPE + "-" + BilouCodec.UNIT;
+
+  private static final String B_TYPE = "btype";
+  private static final String B_START = B_TYPE + "-" + BilouCodec.START;
+  private static final String B_CONTINUE = B_TYPE + "-" + BilouCodec.CONTINUE;
+  private static final String B_LAST = B_TYPE + "-" + BilouCodec.LAST;
+  private static final String B_UNIT = B_TYPE + "-" + BilouCodec.UNIT;
+
+  private static final String C_TYPE = "ctype";
+  private static final String C_UNIT = C_TYPE + "-" + BilouCodec.UNIT;
+
+  private static final String OTHER = BilouCodec.OTHER;
+
+  @Test
+  public void testEncodeNoNames() {
+    NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true);
+    String[] expected = new String[] {OTHER, OTHER, OTHER, OTHER};
+    String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("Only 'Other' is expected.", expected, acutal);
+  }
+
+  @Test
+  public void testEncodeSingleUnitTokenSpan() {
+    String[] sentence = "I called Julie again.".split(" ");
+    Span[] singleSpan = new Span[] { new Span(2,3, A_TYPE)};
+    NameSample nameSample = new NameSample(sentence, singleSpan, true);
+    String[] expected = new String[] {OTHER, OTHER, A_UNIT, OTHER};
+    String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Julie' should be 'unit' only, the rest should be 'other'.", expected, acutal);
+  }
+
+  @Test
+  public void testEncodeDoubleTokenSpan() {
+    String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
+    Span[] singleSpan = new Span[] { new Span(2,4, A_TYPE)};
+    NameSample nameSample = new NameSample(sentence, singleSpan, true);
+    String[] expected = new String[] {OTHER, OTHER, A_START, A_LAST, OTHER};
+    String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 'last' " +
+        "and the rest should be 'other'.", expected, acutal);
+  }
+
+  @Test
+  public void testEncodeTripleTokenSpan() {
+    String[] sentence = "Secretary - General Anders Fogh Rasmussen is from Denmark.".split(" ");
+    Span[] singleSpan = new Span[] { new Span(3,6, A_TYPE)};
+    NameSample nameSample = new NameSample(sentence, singleSpan, true);
+    String[] expected = new String[] {OTHER, OTHER, OTHER, A_START, A_CONTINUE,
+        A_LAST, OTHER, OTHER, OTHER};
+    String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Anders' should be 'start' only, 'Fogh' is 'inside', " +
+        "'Rasmussen' is 'last' and the rest should be 'other'.", expected, acutal);
+  }
+
+  @Test
+  public void testEncodeAdjacentUnitSpans() {
+    String[] sentence = "word PersonA PersonB word".split(" ");
+    Span[] singleSpan = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE)};
+    NameSample nameSample = new NameSample(sentence, singleSpan, true);
+    String[] expected = new String[] {OTHER, A_UNIT, A_UNIT, OTHER};
+    String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("Both PersonA and PersonB are 'unit' tags", expected, acutal);
+  }
+
+  @Test
+  public void testCreateSequenceValidator() {
+    Assert.assertTrue(codec.createSequenceValidator() instanceof BilouNameFinderSequenceValidator);
+  }
+
+  @Test
+  public void testDecodeEmpty() {
+    Span[] expected = new Span[] {};
+    Span[] actual = codec.decode(new ArrayList<String>());
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Unit, Other
+   */
+  @Test
+  public void testDecodeSingletonFirst() {
+
+    List<String> encoded = Arrays.asList(A_UNIT, OTHER);
+    Span[] expected = new Span[] {new Span(0,1, A_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Unit, Unit, Other
+   */
+  @Test
+  public void testDecodeAdjacentSingletonFirst() {
+    List<String> encoded = Arrays.asList(A_UNIT, A_UNIT, OTHER);
+    Span[] expected = new Span[] {new Span(0, 1, A_TYPE), new Span(1, 2, A_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start, Last, Other
+   */
+  @Test
+  public void testDecodePairFirst() {
+    List<String> encoded = Arrays.asList(A_START, A_LAST, OTHER);
+    Span[] expected = new Span[] {new Span(0, 2, A_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start, Continue, Last, Other
+   */
+  @Test
+  public void testDecodeTripletFirst() {
+    List<String> encoded = Arrays.asList(A_START, A_CONTINUE, A_LAST, OTHER);
+    Span[] expected = new Span[] {new Span(0, 3, A_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start, Continue, Continue, Last, Other
+   */
+  @Test
+  public void testDecodeTripletContinuationFirst() {
+    List<String> encoded = Arrays.asList(A_START, A_CONTINUE, A_CONTINUE,
+        A_LAST, OTHER);
+    Span[] expected = new Span[] {new Span(0, 4, A_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start, Last, Unit, Other
+   */
+  @Test
+  public void testDecodeAdjacentPairSingleton() {
+    List<String> encoded = Arrays.asList(A_START, A_LAST, A_UNIT, OTHER);
+    Span[] expected = new Span[] {new Span(0, 2, A_TYPE),
+        new Span(2, 3, A_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Other, Unit, Other
+   */
+  @Test
+  public void testDecodeOtherFirst() {
+    List<String> encoded = Arrays.asList(OTHER, A_UNIT, OTHER);
+    Span[] expected = new Span[] {new Span(1, 2, A_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Other, A-Start, A-Continue, A-Last, Other, B-Start, B-Last, Other, C-Unit, Other
+   */
+  @Test
+  public void testDecodeMultiClass() {
+    List<String> encoded = Arrays.asList(OTHER, A_START, A_CONTINUE, A_LAST, OTHER,
+        B_START, B_LAST, OTHER, C_UNIT, OTHER);
+    Span[] expected = new Span[] {new Span(1, 4, A_TYPE),
+        new Span(5, 7, B_TYPE), new Span(8,9, C_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+}

[20/50] [abbrv] opennlp git commit: OPENNLP-986 - Stupid Backoff as default LM discounting

Posted by jo...@apache.org.

OPENNLP-986 - Stupid Backoff as default LM discounting


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/41f153aa
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/41f153aa
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/41f153aa

Branch: refs/heads/parser_regression
Commit: 41f153aa946641afb076b86c243a7b93667778f6
Parents: 73cf560
Author: Tommaso Teofili <te...@adobe.com>
Authored: Tue Feb 14 14:49:09 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:53 2017 +0200

----------------------------------------------------------------------
 .../tools/languagemodel/NGramLanguageModel.java | 74 +++++---------------
 .../java/opennlp/tools/ngram/NGramUtils.java    |  3 +-
 .../LanguageModelEvaluationTest.java            |  2 +-
 .../languagemodel/NgramLanguageModelTest.java   | 15 ++--
 4 files changed, 28 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/41f153aa/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
index e11c107..501c1bc 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
@@ -26,52 +26,30 @@ import opennlp.tools.util.StringList;
 
 /**
  * A�{@link opennlp.tools.languagemodel.LanguageModel} based on a {@link opennlp.tools.ngram.NGramModel}
- * using Laplace smoothing probability estimation to get the probabilities of the ngrams.
- * See also {@link NGramUtils#calculateLaplaceSmoothingProbability(
- *opennlp.tools.util.StringList, Iterable, int, Double)}.
+ * using Stupid Backoff to get the probabilities of the ngrams.
  */
 public class NGramLanguageModel extends NGramModel implements LanguageModel {
 
   private static final int DEFAULT_N = 3;
-  private static final double DEFAULT_K = 1d;
 
   private final int n;
-  private final double k;
 
   public NGramLanguageModel() {
-    this(DEFAULT_N, DEFAULT_K);
+    this(DEFAULT_N);
   }
 
   public NGramLanguageModel(int n) {
-    this(n, DEFAULT_K);
-  }
-
-  public NGramLanguageModel(double k) {
-    this(DEFAULT_N, k);
-  }
-
-  public NGramLanguageModel(int n, double k) {
     this.n = n;
-    this.k = k;
   }
 
   public NGramLanguageModel(InputStream in) throws IOException {
-    this(in, DEFAULT_N, DEFAULT_K);
-  }
-
-  public NGramLanguageModel(InputStream in, double k) throws IOException {
-    this(in, DEFAULT_N, k);
-  }
-
-  public NGramLanguageModel(InputStream in, int n) throws IOException {
-    this(in, n, DEFAULT_K);
+    this(in, DEFAULT_N);
   }
 
-  public NGramLanguageModel(InputStream in, int n, double k)
+  public NGramLanguageModel(InputStream in, int n)
       throws IOException {
     super(in);
     this.n = n;
-    this.k = k;
   }
 
   @Override
@@ -79,24 +57,13 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel {
     double probability = 0d;
     if (size() > 0) {
       for (StringList ngram : NGramUtils.getNGrams(sample, n)) {
-        StringList nMinusOneToken = NGramUtils
-            .getNMinusOneTokenFirst(ngram);
-        if (size() > 1000000) {
-          // use stupid backoff
-          probability += Math.log(
-              getStupidBackoffProbability(ngram, nMinusOneToken));
-        } else {
-          // use laplace smoothing
-          probability += Math.log(
-              getLaplaceSmoothingProbability(ngram, nMinusOneToken));
+        double score = stupidBackoff(ngram);
+        probability += Math.log(score);
+        if (Double.isNaN(probability)) {
+          probability = 0d;
         }
       }
-      if (Double.isNaN(probability)) {
-        probability = 0d;
-      } else if (probability != 0) {
-        probability = Math.exp(probability);
-      }
-
+      probability = Math.exp(probability);
     }
     return probability;
   }
@@ -125,24 +92,21 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel {
     return token;
   }
 
-  private double getLaplaceSmoothingProbability(StringList ngram,
-                                                StringList nMinusOneToken) {
-    return (getCount(ngram) + k) / (getCount(nMinusOneToken) + k * size());
-  }
-
-  private double getStupidBackoffProbability(StringList ngram,
-                                             StringList nMinusOneToken) {
+  private double stupidBackoff(StringList ngram) {
     int count = getCount(ngram);
+    StringList nMinusOneToken = NGramUtils.getNMinusOneTokenFirst(ngram);
     if (nMinusOneToken == null || nMinusOneToken.size() == 0) {
-      return count / size();
+      return (double) count / (double) size();
     } else if (count > 0) {
-      return ((double) count) / ((double) getCount(
-          nMinusOneToken)); // maximum likelihood probability
+      double countM1 = getCount(nMinusOneToken);
+      if (countM1 == 0d) {
+        countM1 = size(); // to avoid Infinite if n-1grams do not exist
+      }
+      return (double) count / countM1;
     } else {
-      StringList nextNgram = NGramUtils.getNMinusOneTokenLast(ngram);
-      return 0.4d * getStupidBackoffProbability(nextNgram,
-          NGramUtils.getNMinusOneTokenFirst(nextNgram));
+      return 0.4 * stupidBackoff(NGramUtils.getNMinusOneTokenLast(ngram));
     }
+
   }
 
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/41f153aa/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java
index 0132c92..e41291f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java
@@ -34,13 +34,12 @@ public class NGramUtils {
    *
    * @param ngram the ngram to get the probability for
    * @param set   the vocabulary
-   * @param size  the size of the vocabulary
    * @param k     the smoothing factor
    * @return the Laplace smoothing probability
    * @see <a href="https://en.wikipedia.org/wiki/Additive_smoothing">Additive Smoothing</a>
    */
   public static double calculateLaplaceSmoothingProbability(StringList ngram,
-      Iterable<StringList> set, int size, Double k) {
+      Iterable<StringList> set, Double k) {
     return (count(ngram, set) + k) / (count(getNMinusOneTokenFirst(ngram), set) + k * 1);
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/41f153aa/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java
index b6c3f01..d4e8e37 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java
@@ -54,7 +54,7 @@ public class LanguageModelEvaluationTest {
 
     NGramLanguageModel trigramLM = new NGramLanguageModel(3);
     for (StringList sentence : trainingVocabulary) {
-      trigramLM.add(sentence, 2, 3);
+      trigramLM.add(sentence, 1, 3);
     }
     double trigramPerplexity =
         LanguageModelTestUtils.getPerplexity(trigramLM, testVocabulary, 3);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/41f153aa/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java
index 7ffbf27..2ac1f5e 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java
@@ -22,7 +22,6 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.commons.io.IOUtils;
-
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -47,7 +46,7 @@ public class NgramLanguageModelTest {
   public void testRandomVocabularyAndSentence() throws Exception {
     NGramLanguageModel model = new NGramLanguageModel();
     for (StringList sentence : LanguageModelTestUtils.generateRandomVocabulary(10)) {
-      model.add(sentence, 2, 3);
+      model.add(sentence, 1, 3);
     }
     double probability = model.calculateProbability(LanguageModelTestUtils.generateRandomSentence());
     Assert.assertTrue("a probability measure should be between 0 and 1 [was "
@@ -71,7 +70,7 @@ public class NgramLanguageModelTest {
 
   @Test
   public void testBigramProbabilityNoSmoothing() throws Exception {
-    NGramLanguageModel model = new NGramLanguageModel(2, 0);
+    NGramLanguageModel model = new NGramLanguageModel(2);
     model.add(new StringList("<s>", "I", "am", "Sam", "</s>"), 1, 2);
     model.add(new StringList("<s>", "Sam", "I", "am", "</s>"), 1, 2);
     model.add(new StringList("<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>"), 1, 2);
@@ -94,16 +93,16 @@ public class NgramLanguageModelTest {
   @Test
   public void testTrigram() throws Exception {
     NGramLanguageModel model = new NGramLanguageModel(3);
-    model.add(new StringList("I", "see", "the", "fox"), 2, 3);
-    model.add(new StringList("the", "red", "house"), 2, 3);
-    model.add(new StringList("I", "saw", "something", "nice"), 2, 3);
+    model.add(new StringList("I", "see", "the", "fox"), 1, 3);
+    model.add(new StringList("the", "red", "house"), 1, 3);
+    model.add(new StringList("I", "saw", "something", "nice"), 1, 3);
     double probability = model.calculateProbability(new StringList("I", "saw", "the", "red", "house"));
     Assert.assertTrue("a probability measure should be between 0 and 1 [was "
         + probability + "]", probability >= 0 && probability <= 1);
 
     StringList tokens = model.predictNextTokens(new StringList("I", "saw"));
     Assert.assertNotNull(tokens);
-    Assert.assertEquals(new StringList("something", "nice"), tokens);
+    Assert.assertEquals(new StringList("something"), tokens);
   }
 
   @Test
@@ -128,7 +127,7 @@ public class NgramLanguageModelTest {
     double probability = languageModel.calculateProbability(new StringList("The", "brown", "fox", "jumped"));
     Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
         probability >= 0 && probability <= 1);
-    StringList tokens = languageModel.predictNextTokens(new StringList("fox"));
+    StringList tokens = languageModel.predictNextTokens(new StringList("the","brown","fox"));
     Assert.assertNotNull(tokens);
     Assert.assertEquals(new StringList("jumped"), tokens);
   }

[21/50] [abbrv] opennlp git commit: NoJira: Add checkstyle for new lines and fix existing files

Posted by jo...@apache.org.

NoJira: Add checkstyle for new lines and fix existing files

This closes #123


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/73cf5600
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/73cf5600
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/73cf5600

Branch: refs/heads/parser_regression
Commit: 73cf5600206d225f46c17797793f2e2b77e7d422
Parents: 91352d5
Author: jzonthemtn <je...@mtnfog.com>
Authored: Mon Feb 13 15:20:30 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:53 2017 +0200

----------------------------------------------------------------------
 checkstyle.xml                                                    | 2 ++
 .../java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java     | 3 ++-
 .../src/main/java/opennlp/tools/chunker/package-info.java         | 2 +-
 .../main/java/opennlp/tools/cmdline/AbstractEvaluatorTool.java    | 2 +-
 .../src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java     | 2 +-
 .../src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java  | 2 +-
 .../java/opennlp/tools/cmdline/chunker/ChunkerConverterTool.java  | 2 +-
 .../java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java   | 2 +-
 .../main/java/opennlp/tools/cmdline/lemmatizer/package-info.java  | 2 +-
 .../main/java/opennlp/tools/cmdline/params/BasicFormatParams.java | 2 +-
 .../tools/cmdline/sentdetect/SentenceDetectorConverterTool.java   | 2 +-
 .../opennlp/tools/cmdline/tokenizer/TokenizerConverterTool.java   | 2 +-
 .../src/main/java/opennlp/tools/dictionary/package-info.java      | 2 +-
 .../src/main/java/opennlp/tools/doccat/package-info.java          | 2 +-
 .../java/opennlp/tools/formats/AbstractSampleStreamFactory.java   | 2 +-
 .../java/opennlp/tools/formats/ChunkerSampleStreamFactory.java    | 2 +-
 .../opennlp/tools/formats/DetokenizerSampleStreamFactory.java     | 2 +-
 .../java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java | 2 +-
 .../main/java/opennlp/tools/formats/ParseSampleStreamFactory.java | 2 +-
 .../java/opennlp/tools/formats/SentenceSampleStreamFactory.java   | 2 +-
 .../main/java/opennlp/tools/formats/TokenSampleStreamFactory.java | 2 +-
 .../tools/formats/ontonotes/OntoNotesNameSampleStream.java        | 2 +-
 .../src/main/java/opennlp/tools/formats/package-info.java         | 2 +-
 .../src/main/java/opennlp/tools/languagemodel/package-info.java   | 2 +-
 .../src/main/java/opennlp/tools/lemmatizer/package-info.java      | 2 +-
 .../java/opennlp/tools/namefind/NameFinderSequenceValidator.java  | 2 +-
 .../src/main/java/opennlp/tools/namefind/package-info.java        | 2 +-
 opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java | 2 +-
 opennlp-tools/src/main/java/opennlp/tools/package-info.java       | 2 +-
 opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java   | 2 +-
 .../java/opennlp/tools/parser/ParserChunkerSequenceValidator.java | 2 +-
 .../src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java   | 2 +-
 .../src/main/java/opennlp/tools/parser/chunking/package-info.java | 2 +-
 .../src/main/java/opennlp/tools/parser/package-info.java          | 2 +-
 .../java/opennlp/tools/parser/treeinsert/ParserEventStream.java   | 2 +-
 .../main/java/opennlp/tools/parser/treeinsert/package-info.java   | 2 +-
 .../src/main/java/opennlp/tools/postag/package-info.java          | 2 +-
 .../src/main/java/opennlp/tools/sentdetect/package-info.java      | 2 +-
 .../src/main/java/opennlp/tools/tokenize/package-info.java        | 2 +-
 .../src/main/java/opennlp/tools/util/CollectionObjectStream.java  | 2 +-
 .../src/main/java/opennlp/tools/util/InputStreamFactory.java      | 2 +-
 .../src/main/java/opennlp/tools/util/ext/package-info.java        | 2 +-
 .../opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java | 2 +-
 .../src/main/java/opennlp/tools/util/featuregen/package-info.java | 2 +-
 .../main/java/opennlp/tools/util/model/DictionarySerializer.java  | 2 +-
 .../main/java/opennlp/tools/util/model/PropertiesSerializer.java  | 2 +-
 opennlp-tools/src/main/java/opennlp/tools/util/package-info.java  | 2 +-
 .../src/test/java/opennlp/tools/dictionary/DictionaryTest.java    | 2 +-
 .../java/opennlp/tools/formats/Conll03NameSampleStreamTest.java   | 2 +-
 .../test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java  | 2 +-
 .../opennlp/tools/ml/maxent/io/RealValueFileEventStreamTest.java  | 2 +-
 .../opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java | 2 +-
 .../java/opennlp/tools/namefind/DictionaryNameFinderTest.java     | 2 +-
 .../src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java | 2 +-
 .../src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java | 2 +-
 .../src/test/java/opennlp/tools/postag/POSModelTest.java          | 2 +-
 .../src/test/java/opennlp/tools/postag/POSSampleTest.java         | 2 +-
 .../src/test/java/opennlp/tools/tokenize/SimpleTokenizerTest.java | 2 +-
 .../src/test/java/opennlp/tools/tokenize/TokenizerMETest.java     | 2 +-
 .../src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java  | 2 +-
 .../test/java/opennlp/tools/tokenize/WhitespaceTokenizerTest.java | 2 +-
 .../src/test/java/opennlp/tools/util/AbstractEventStreamTest.java | 2 +-
 .../src/test/java/opennlp/tools/util/StringListTest.java          | 2 +-
 opennlp-tools/src/test/java/opennlp/tools/util/VersionTest.java   | 2 +-
 .../src/test/java/opennlp/tools/util/eval/FMeasureTest.java       | 2 +-
 .../opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java | 2 +-
 .../java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java  | 2 +-
 .../opennlp/tools/util/featuregen/IdentityFeatureGenerator.java   | 2 +-
 .../opennlp/tools/util/featuregen/WindowFeatureGeneratorTest.java | 2 +-
 .../src/main/java/opennlp/uima/doccat/DocumentCategorizer.java    | 2 +-
 .../src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java | 2 +-
 opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java  | 2 +-
 .../src/main/java/opennlp/uima/normalizer/Normalizer.java         | 2 +-
 .../src/main/java/opennlp/uima/normalizer/NumberUtil.java         | 2 +-
 .../src/main/java/opennlp/uima/normalizer/StringDictionary.java   | 2 +-
 .../src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java      | 2 +-
 opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java   | 2 +-
 .../src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java  | 2 +-
 .../src/main/java/opennlp/uima/util/AnnotationComparator.java     | 2 +-
 .../src/main/java/opennlp/uima/util/ContainingConstraint.java     | 2 +-
 .../src/main/java/opennlp/uima/util/ExceptionMessages.java        | 2 +-
 .../java/opennlp/uima/util/OpenNlpAnnotatorProcessException.java  | 2 +-
 82 files changed, 84 insertions(+), 81 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/checkstyle.xml
----------------------------------------------------------------------
diff --git a/checkstyle.xml b/checkstyle.xml
index 64da8c8..ab65feb 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -42,6 +42,8 @@
     <property name="eachLine" value="true"/>
   </module>
 
+  <module name="NewlineAtEndOfFile" />
+
   <module name="RegexpMultiline">
     <property name="format" value="\r\n"/>
     <property name="message" value="CRLF line endings are prohibited"/>

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
index 351ac93..9ce22d2 100644
--- a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
+++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
@@ -93,4 +93,5 @@ public class POSTaggerFactoryTest {
     Assert.assertEquals(2, factory.getTagDictionary().getTags("casa").length);
   }
 
-}
\ No newline at end of file
+}
+

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/chunker/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/package-info.java
index 1a789bf..c76a55e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package related to finding non-recursive syntactic annotation such as noun phrase chunks.
  */
-package opennlp.tools.chunker;
\ No newline at end of file
+package opennlp.tools.chunker;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractEvaluatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractEvaluatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractEvaluatorTool.java
index 3a81303..46301ce 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractEvaluatorTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractEvaluatorTool.java
@@ -49,4 +49,4 @@ public class AbstractEvaluatorTool<T, P> extends AbstractTypedParamTool<T, P> {
     validateFactoryArgs(factory, fargs);
     sampleStream = factory.create(fargs);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
index 270a429..abe73b4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/BasicCmdLineTool.java
@@ -30,4 +30,4 @@ public abstract class BasicCmdLineTool extends CmdLineTool {
    * @param args arguments
    */
   public abstract void run(String args[]);
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
index e00f4bf..8bc6b95 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ObjectStreamFactory.java
@@ -37,4 +37,4 @@ public interface ObjectStreamFactory<T> {
    * @return ObjectStream instance
    */
   ObjectStream<T> create(String args[]);
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerConverterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerConverterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerConverterTool.java
index 43463c4..60e72ed 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerConverterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerConverterTool.java
@@ -29,4 +29,4 @@ public class ChunkerConverterTool extends AbstractConverterTool<ChunkSample> {
   public ChunkerConverterTool() {
     super(ChunkSample.class);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
index 13f28b2..e4e47b5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java
@@ -89,4 +89,4 @@ public class LemmatizerMETool extends BasicCmdLineTool {
       perfMon.stopAndPrintFinalResult();
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/package-info.java
index 76db793..9f1b84a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Classes giving access to the opennlp.tools.lemmatizer functionalities.
  */
-package opennlp.tools.cmdline.lemmatizer;
\ No newline at end of file
+package opennlp.tools.cmdline.lemmatizer;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java
index 959d2db..292612f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java
@@ -28,4 +28,4 @@ public interface BasicFormatParams extends EncodingParameter {
 
   @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.")
   File getData();
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorConverterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorConverterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorConverterTool.java
index 72de06e..de85d0c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorConverterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorConverterTool.java
@@ -25,4 +25,4 @@ public class SentenceDetectorConverterTool extends AbstractConverterTool<Sentenc
   public SentenceDetectorConverterTool() {
     super(SentenceSample.class);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerConverterTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerConverterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerConverterTool.java
index 609b874..808fa59 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerConverterTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerConverterTool.java
@@ -25,4 +25,4 @@ public class TokenizerConverterTool extends AbstractConverterTool<TokenSample> {
   public TokenizerConverterTool() {
     super(TokenSample.class);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/dictionary/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/package-info.java
index d21fa86..c6a368b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package related to parsing and storing dictionaries.
  */
-package opennlp.tools.dictionary;
\ No newline at end of file
+package opennlp.tools.dictionary;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/doccat/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/package-info.java
index eb5aa42..9a0795e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package for classifying a document into a category.
  */
-package opennlp.tools.doccat;
\ No newline at end of file
+package opennlp.tools.doccat;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
index e229666..6a7690e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java
@@ -41,4 +41,4 @@ public abstract class AbstractSampleStreamFactory<T> implements ObjectStreamFact
   public <P> Class<P> getParameters() {
     return params;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java
index d64c1ea..a95fe3d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java
@@ -61,4 +61,4 @@ public class ChunkerSampleStreamFactory extends AbstractSampleStreamFactory<Chun
 
     return new ChunkSampleStream(lineStream);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
index 4230524..cb68348 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
@@ -44,4 +44,4 @@ public abstract class DetokenizerSampleStreamFactory<T> extends AbstractSampleSt
       throw new TerminateToolException(-1, "IO error while loading detokenizer dict: " + e.getMessage(), e);
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java
index edd9a2d..3635baf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java
@@ -61,4 +61,4 @@ public class LemmatizerSampleStreamFactory extends AbstractSampleStreamFactory<L
 
     return new LemmaSampleStream(lineStream);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
index 78c7812..765b9eb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
@@ -61,4 +61,4 @@ public class ParseSampleStreamFactory extends AbstractSampleStreamFactory<Parse>
 
     return new ParseSampleStream(lineStream);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
index 933fe6b..2292ed8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
@@ -62,4 +62,4 @@ public class SentenceSampleStreamFactory extends AbstractSampleStreamFactory<Sen
 
     return new SentenceSampleStream(lineStream);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
index d915ed8..a0a1315 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
@@ -61,4 +61,4 @@ public class TokenSampleStreamFactory extends LanguageSampleStreamFactory<TokenS
 
     return new TokenSampleStream(lineStream);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
index 744e134..41e5aa9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
@@ -166,4 +166,4 @@ public class OntoNotesNameSampleStream extends
       return null;
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/formats/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/formats/package-info.java
index 3913977..7042203 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Experimental package related to converting various corpora to OpenNLP Format.
  */
-package opennlp.tools.formats;
\ No newline at end of file
+package opennlp.tools.formats;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/languagemodel/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/package-info.java
index e73e020..e64bbee 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package related to language models
  */
-package opennlp.tools.languagemodel;
\ No newline at end of file
+package opennlp.tools.languagemodel;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
index ef79ae8..74d2148 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package related with the lemmatizer tool
  */
-package opennlp.tools.lemmatizer;
\ No newline at end of file
+package opennlp.tools.lemmatizer;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
index cccf1ac..d42e8c5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java
@@ -51,4 +51,4 @@ public class NameFinderSequenceValidator implements
     }
     return true;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/namefind/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/package-info.java
index 1bb338f..01e55ae 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package related to finding proper names and numeric amounts.
  */
-package opennlp.tools.namefind;
\ No newline at end of file
+package opennlp.tools.namefind;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java
index cce3053..2e6558f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package related to computing and storing n-gram frequencies.
  */
-package opennlp.tools.ngram;
\ No newline at end of file
+package opennlp.tools.ngram;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/package-info.java
index 6c20792..7204600 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Contains packages which solve common NLP tasks.
  */
-package opennlp.tools;
\ No newline at end of file
+package opennlp.tools;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java b/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java
index 204bd9a..bf4c069 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java
@@ -40,4 +40,4 @@ public interface HeadRules {
    * @return the set of punctuation tags.
    */
   Set<String> getPunctuationTags();
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
index 0787b85..6b748a6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
@@ -66,4 +66,4 @@ public class ParserChunkerSequenceValidator implements SequenceValidator<String>
     }
     return true;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java
index c144a4b..b67c1e7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java
@@ -32,4 +32,4 @@ public enum ParserEventTypeEnum {
   TAG,
 
   ATTACH
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/package-info.java
index baaccad..bacf55c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package containing code for performing full syntactic parsing using shift/reduce-style decisions.
  */
-package opennlp.tools.parser.chunking;
\ No newline at end of file
+package opennlp.tools.parser.chunking;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/parser/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/parser/package-info.java
index 2136690..3b3de74 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package containing common code for performing full syntactic parsing.
  */
-package opennlp.tools.parser;
\ No newline at end of file
+package opennlp.tools.parser;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
index 0372015..5390501 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
@@ -352,4 +352,4 @@ public class ParserEventStream extends AbstractParserEventStream {
       builtNodes.clear();
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/package-info.java
index 80c8819..2776888 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/package-info.java
@@ -19,4 +19,4 @@
  * Package containing experimental code for performing full syntactic
  * parsing using attachment decisions.
  */
-package opennlp.tools.parser.treeinsert;
\ No newline at end of file
+package opennlp.tools.parser.treeinsert;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/postag/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/postag/package-info.java
index 3c3d3ce..598f91b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package related to part-of-speech tagging.
  */
-package opennlp.tools.postag;
\ No newline at end of file
+package opennlp.tools.postag;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/sentdetect/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/package-info.java
index 8e5b0fe..d60d84f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package related to identifying sentece boundries.
  */
-package opennlp.tools.sentdetect;
\ No newline at end of file
+package opennlp.tools.sentdetect;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/tokenize/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/package-info.java
index bd764fa..4365060 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/package-info.java
@@ -21,4 +21,4 @@
  * learnable <code>TokenizerME</code>, the <code>WhitespaceTokenizer</code> and
  * the <code>SimpleTokenizer</code> which is a character class tokenizer.
  */
-package opennlp.tools.tokenize;
\ No newline at end of file
+package opennlp.tools.tokenize;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java b/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java
index 700d1f3..a819f6f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java
@@ -40,4 +40,4 @@ public class CollectionObjectStream<E> implements ObjectStream<E> {
 
   public void close() {
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java
index 4a57a49..1c872e6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java
@@ -29,4 +29,4 @@ import java.io.InputStream;
 public interface InputStreamFactory {
 
   InputStream createInputStream() throws IOException;
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/util/ext/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/ext/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/util/ext/package-info.java
index 6de9b39..aea56d4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/ext/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/ext/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package containing extension loading code.
  */
-package opennlp.tools.util.ext;
\ No newline at end of file
+package opennlp.tools.util.ext;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
index cc55f1a..771a0b7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
@@ -36,4 +36,4 @@ public class BigramNameFeatureGenerator implements AdaptiveFeatureGenerator {
       features.add("wc,nc=" + wc + "," + nwc);
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/package-info.java
index d81d813..d04f321 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/package-info.java
@@ -18,4 +18,4 @@
 /**
  * This package contains classes for generating sequence features.
  */
-package opennlp.tools.util.featuregen;
\ No newline at end of file
+package opennlp.tools.util.featuregen;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java
index 092fa8e..a323122 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java
@@ -37,4 +37,4 @@ public class DictionarySerializer implements ArtifactSerializer<Dictionary> {
   static void register(Map<String, ArtifactSerializer> factories) {
     factories.put("dictionary", new DictionarySerializer());
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java
index 5d03413..adef53c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java
@@ -39,4 +39,4 @@ class PropertiesSerializer implements ArtifactSerializer<Properties> {
   static void register(Map<String, ArtifactSerializer> factories) {
     factories.put("properties", new PropertiesSerializer());
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/main/java/opennlp/tools/util/package-info.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/util/package-info.java
index a9e0830..2d4c65b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Package containing utility data structures and algorithms used by multiple other packages.
  */
-package opennlp.tools.util;
\ No newline at end of file
+package opennlp.tools.util;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java b/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java
index 3e4b313..54e537f 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java
@@ -230,4 +230,4 @@ public class DictionaryTest {
     Assert.assertTrue(!dict.contains(entry2));
   }
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java
index 177f891..42ce715 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java
@@ -106,4 +106,4 @@ public class Conll03NameSampleStreamTest {
 
     Assert.assertEquals(sample, sampleStream.read());
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
index 4c93c15..76a4813 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java
@@ -101,4 +101,4 @@ public class ScaleDoesntMatterTest {
       Assert.assertEquals(smallResults[i], largeResults[i], 0.01f);
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/io/RealValueFileEventStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/io/RealValueFileEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/io/RealValueFileEventStreamTest.java
index e583840..d084977 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/io/RealValueFileEventStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/io/RealValueFileEventStreamTest.java
@@ -56,4 +56,4 @@ public class RealValueFileEventStreamTest {
     }
     Assert.assertEquals(1, indexer.getOutcomeLabels().length);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
index b774a67..a76b428 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
@@ -76,4 +76,4 @@ public class NaiveBayesModelReadWriteTest {
     AbstractModel abstractModel = reader.constructModel();
     Assert.assertNotNull(abstractModel);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
index d149086..7599551 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java
@@ -117,4 +117,4 @@ public class DictionaryNameFinderTest {
     Assert.assertTrue(names.length == 1);
     Assert.assertTrue(names[0].length() == 2);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
index d50a34f..0b3fe2a 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
@@ -96,4 +96,4 @@ public class RegexNameFinderTest {
 
     Assert.assertTrue(result.length == 0);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java b/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
index 7143dfb..e0ce2a6 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java
@@ -125,4 +125,4 @@ public class DummyPOSTaggerFactory extends POSTaggerFactory {
 
   }
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/postag/POSModelTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSModelTest.java
index 2b30e35..dffb2d4 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSModelTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSModelTest.java
@@ -62,4 +62,4 @@ public class POSModelTest {
 
     // TODO: add equals to pos model
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java
index e6a2846..de437cd 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java
@@ -111,4 +111,4 @@ public class POSSampleTest {
 
     Assert.fail();
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/tokenize/SimpleTokenizerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/SimpleTokenizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/SimpleTokenizerTest.java
index 0e59c11..d51c905 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/SimpleTokenizerTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/SimpleTokenizerTest.java
@@ -89,4 +89,4 @@ public class SimpleTokenizerTest {
     Assert.assertTrue(")".equals(tokenizedText[4]));
     Assert.assertTrue(tokenizedText.length == 5);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
index f9b15f1..9acb2e8 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java
@@ -65,4 +65,4 @@ public class TokenizerMETest {
     Assert.assertEquals("through", tokens[7]);
     Assert.assertEquals("!", tokens[8]);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java
index 4a5aab5..ea24039 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java
@@ -49,4 +49,4 @@ public class TokenizerModelTest {
 
     // TODO: check that both maxent models are equal
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/tokenize/WhitespaceTokenizerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/WhitespaceTokenizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/WhitespaceTokenizerTest.java
index f54402f..c6eba8b 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/WhitespaceTokenizerTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/WhitespaceTokenizerTest.java
@@ -59,4 +59,4 @@ public class WhitespaceTokenizerTest {
     Assert.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize(" ").length); // tab
     Assert.assertEquals(0, WhitespaceTokenizer.INSTANCE.tokenize("     ").length);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java
index 770d97a..0470fc2 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java
@@ -124,4 +124,4 @@ public class AbstractEventStreamTest {
     }
 
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
index 83b7503..a57a2ae 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/StringListTest.java
@@ -99,4 +99,4 @@ public class StringListTest {
   public void testToString() {
     new StringList("a", "b").toString();
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/util/VersionTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/VersionTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/VersionTest.java
index 8c2292b..49b6d1f 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/VersionTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/VersionTest.java
@@ -67,4 +67,4 @@ public class VersionTest {
 
     Assert.assertTrue(false);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
index 82087aa..b57b0ec 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java
@@ -153,4 +153,4 @@ public class FMeasureTest {
     Assert.assertEquals(fm.getRecallScore(), fmMerge.getRecallScore(), DELTA);
     Assert.assertEquals(fm.getPrecisionScore(), fmMerge.getPrecisionScore(), DELTA);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
index b7fa222..01b1d95 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java
@@ -137,4 +137,4 @@ public class CachedFeatureGeneratorTest {
     Assert.assertTrue(features.contains(expectedToken));
     Assert.assertEquals(1, features.size());
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
index 9911a36..8a48575 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
@@ -124,4 +124,4 @@ public class GeneratorFactoryTest {
 
     Assert.assertTrue(mapping.get("test.resource") instanceof WordClusterDictionarySerializer);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java
index 970fe63..a1562a8 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java
@@ -25,4 +25,4 @@ class IdentityFeatureGenerator implements AdaptiveFeatureGenerator {
       String[] previousOutcomes) {
     features.add(tokens[index]);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/WindowFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/WindowFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/WindowFeatureGeneratorTest.java
index 5c645cf..aff43c0 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/WindowFeatureGeneratorTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/WindowFeatureGeneratorTest.java
@@ -116,4 +116,4 @@ public class WindowFeatureGeneratorTest {
     Assert.assertTrue(features.contains(WindowFeatureGenerator.NEXT_PREFIX + "2" +
         testSentence[testTokenIndex + 2]));
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
index 5159680..0f83f85 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
@@ -68,4 +68,4 @@ public class DocumentCategorizer extends AbstractDocumentCategorizer {
 
     categoryAnnotation.setStringValue(mCategoryFeature, bestCategory);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
index a6289fe..5920de3 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
@@ -93,4 +93,4 @@ public class DictionaryNameFinder extends AbstractNameFinder {
   public void destroy() {
     mNameFinder = null;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
index f7be057..e89d2d6 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
@@ -203,4 +203,4 @@ public final class NameFinder extends AbstractNameFinder {
   public void destroy() {
     mNameFinder = null;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
index 5a6a578..032d230 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
@@ -236,4 +236,4 @@ public class Normalizer extends CasAnnotator_ImplBase {
       }
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
index a6c1c94..8e8920f 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
@@ -94,4 +94,4 @@ public final class NumberUtil {
 
     return numberFormat.parse(number);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
index d477dbf..d9239bd 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
@@ -95,4 +95,4 @@ public class StringDictionary {
 
     DictionaryEntryPersistor.serialize(out, entryIterator, true);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
index 9ec3733..6e353b6 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
@@ -57,4 +57,4 @@ public final class SimpleTokenizer extends AbstractTokenizer {
   protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
     return tokenizer.tokenizePos(sentence.getCoveredText());
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
index dc97be3..9f72f92 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
@@ -139,4 +139,4 @@ public final class Tokenizer extends AbstractTokenizer {
     // dereference model to allow garbage collection
     tokenizer = null;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
index e671882..0151b75 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
@@ -52,4 +52,4 @@ public final class WhitespaceTokenizer extends AbstractTokenizer {
     return opennlp.tools.tokenize.WhitespaceTokenizer.INSTANCE.
         tokenizePos(sentence.getCoveredText());
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
index e4ed313..57bdb0d 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
@@ -38,4 +38,4 @@ public class AnnotationComparator implements Comparator<AnnotationFS>
   public int compare(AnnotationFS a, AnnotationFS b) {
     return a.getBegin() - b.getBegin();
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
index 57ae612..24c50b8 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
@@ -72,4 +72,4 @@ public final class ContainingConstraint implements FSMatchConstraint {
       && (containing.getEnd() >= annotation.getEnd());
   }
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java b/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java
index 2b2febf..bbf2243 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java
@@ -36,4 +36,4 @@ public class ExceptionMessages {
   public static final String FEATURE_NOT_FOUND = "feature_not_found";
 
   public static final String WRONG_FEATURE_TYPE = "wrong_feature_type";
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/73cf5600/opennlp-uima/src/main/java/opennlp/uima/util/OpenNlpAnnotatorProcessException.java
----------------------------------------------------------------------
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/OpenNlpAnnotatorProcessException.java b/opennlp-uima/src/main/java/opennlp/uima/util/OpenNlpAnnotatorProcessException.java
index e817b67..fcff77f 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/OpenNlpAnnotatorProcessException.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/OpenNlpAnnotatorProcessException.java
@@ -36,4 +36,4 @@ public class OpenNlpAnnotatorProcessException extends
   public OpenNlpAnnotatorProcessException(Throwable t) {
     super(t);
   }
-}
\ No newline at end of file
+}

[29/50] [abbrv] opennlp git commit: OPENNLP-992: Distribution package should include example parameters file

Posted by jo...@apache.org.

OPENNLP-992: Distribution package should include example parameters file


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/6cdca662
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/6cdca662
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/6cdca662

Branch: refs/heads/parser_regression
Commit: 6cdca662f7c7f44afb79ef6a83140089bfec1301
Parents: 9610781
Author: smarthi <sm...@apache.org>
Authored: Tue Feb 21 13:47:29 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:54 2017 +0200

----------------------------------------------------------------------
 opennlp-distr/src/main/assembly/bin.xml | 7 +++++++
 1 file changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/6cdca662/opennlp-distr/src/main/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/opennlp-distr/src/main/assembly/bin.xml b/opennlp-distr/src/main/assembly/bin.xml
index 43d95f4..b1e27c8 100644
--- a/opennlp-distr/src/main/assembly/bin.xml
+++ b/opennlp-distr/src/main/assembly/bin.xml
@@ -87,6 +87,13 @@
 			<directoryMode>755</directoryMode>
 			<outputDirectory>bin</outputDirectory>
 		</fileSet>
+
+		<fileSet>
+			<directory>../opennlp-tools/lang</directory>
+			<fileMode>644</fileMode>
+			<directoryMode>755</directoryMode>
+			<outputDirectory>lang</outputDirectory>
+		</fileSet>
 		
 		<fileSet>
 			<directory>../opennlp-docs/target/docbkx/html</directory>

[26/50] [abbrv] opennlp git commit: OPENNLP-982: Allow loading of 1.5.x models

Posted by jo...@apache.org.

OPENNLP-982: Allow loading of 1.5.x models

This closes #129


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/ebb5b248
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/ebb5b248
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/ebb5b248

Branch: refs/heads/parser_regression
Commit: ebb5b24851706ec3684d4b0ddf4e35542984dfc5
Parents: eee4231
Author: J�rn Kottmann <jo...@apache.org>
Authored: Fri Feb 17 12:32:11 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:54 2017 +0200

----------------------------------------------------------------------
 .../src/main/java/opennlp/tools/util/model/BaseModel.java          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/ebb5b248/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
index 20acd9d..f70fb03 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
@@ -416,7 +416,7 @@ public abstract class BaseModel implements ArtifactProvider, Serializable {
         // Major and minor version must match, revision might be
         // this check allows for the use of models of n minor release behind current minor release
         if (Version.currentVersion().getMajor() != version.getMajor() ||
-            Version.currentVersion().getMinor() - 2 > version.getMinor()) {
+            Version.currentVersion().getMinor() - 3 > version.getMinor()) {
           throw new InvalidFormatException("Model version " + version + " is not supported by this ("
               + Version.currentVersion() + ") version of OpenNLP!");
         }

[06/50] [abbrv] opennlp git commit: OPENNLP-975: Add format support for CoNLL-U format

Posted by jo...@apache.org.

OPENNLP-975: Add format support for CoNLL-U format


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/212cf147
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/212cf147
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/212cf147

Branch: refs/heads/parser_regression
Commit: 212cf147820cf19e549586f9333c2b3803ae4896
Parents: f9db192
Author: J�rn Kottmann <jo...@apache.org>
Authored: Mon Feb 6 19:43:33 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:51 2017 +0200

----------------------------------------------------------------------
 .../tools/cmdline/StreamFactoryRegistry.java    |   5 +
 .../formats/conllu/ConlluLemmaSampleStream.java |  57 ++++++++
 .../conllu/ConlluLemmaSampleStreamFactory.java  |  82 ++++++++++++
 .../formats/conllu/ConlluPOSSampleStream.java   |  56 ++++++++
 .../conllu/ConlluPOSSampleStreamFactory.java    |  82 ++++++++++++
 .../tools/formats/conllu/ConlluSentence.java    |  33 +++++
 .../tools/formats/conllu/ConlluStream.java      |  75 +++++++++++
 .../tools/formats/conllu/ConlluTagset.java      |  23 ++++
 .../tools/formats/conllu/ConlluWordLine.java    | 130 +++++++++++++++++++
 .../formats/conllu/ConlluWordLineTest.java      |  43 ++++++
 10 files changed, 586 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java
index 56625a9..9977519 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java
@@ -42,6 +42,8 @@ import opennlp.tools.formats.ad.ADPOSSampleStreamFactory;
 import opennlp.tools.formats.ad.ADSentenceSampleStreamFactory;
 import opennlp.tools.formats.ad.ADTokenSampleStreamFactory;
 import opennlp.tools.formats.brat.BratNameSampleStreamFactory;
+import opennlp.tools.formats.conllu.ConlluLemmaSampleStreamFactory;
+import opennlp.tools.formats.conllu.ConlluPOSSampleStreamFactory;
 import opennlp.tools.formats.convert.NameToSentenceSampleStreamFactory;
 import opennlp.tools.formats.convert.NameToTokenSampleStreamFactory;
 import opennlp.tools.formats.convert.POSToSentenceSampleStreamFactory;
@@ -110,6 +112,9 @@ public final class StreamFactoryRegistry {
 
     LetsmtSentenceStreamFactory.registerFactory();
     MosesSentenceSampleStreamFactory.registerFactory();
+
+    ConlluPOSSampleStreamFactory.registerFactory();
+    ConlluLemmaSampleStreamFactory.registerFactory();
   }
 
   public static final String DEFAULT_FORMAT = "opennlp";

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStream.java
new file mode 100644
index 0000000..0782120
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStream.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.lemmatizer.LemmaSample;
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+public class ConlluLemmaSampleStream extends FilterObjectStream<ConlluSentence, LemmaSample> {
+
+  private final ConlluTagset tagset;
+
+  ConlluLemmaSampleStream(ObjectStream<ConlluSentence> samples, ConlluTagset tagset) {
+    super(samples);
+    this.tagset = tagset;
+  }
+
+  @Override
+  public LemmaSample read() throws IOException {
+    ConlluSentence sentence = samples.read();
+
+    if (sentence != null) {
+      List<String> tokens = new ArrayList<>();
+      List<String> tags = new ArrayList<>();
+      List<String> lemmas = new ArrayList<>();
+
+      for (ConlluWordLine line : sentence.getWordLines()) {
+        tokens.add(line.getForm());
+        tags.add(line.getPosTag(tagset));
+        lemmas.add(line.getLemma());
+      }
+
+      return new LemmaSample(tokens, tags, lemmas);
+    }
+
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStreamFactory.java
new file mode 100644
index 0000000..4806967
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluLemmaSampleStreamFactory.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+import java.io.IOException;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.BasicFormatParams;
+import opennlp.tools.formats.AbstractSampleStreamFactory;
+import opennlp.tools.lemmatizer.LemmaSample;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ConlluLemmaSampleStreamFactory extends AbstractSampleStreamFactory<LemmaSample> {
+
+  public static final String CONLLU_FORMAT = "conllu";
+
+  interface Parameters extends BasicFormatParams {
+    @ArgumentParser.ParameterDescription(valueName = "tagset",
+        description = "u|x u for unified tags and x for language-specific part-of-speech tags")
+    @ArgumentParser.OptionalParameter(defaultValue = "u")
+    String getTagset();
+  }
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(LemmaSample.class,
+        CONLLU_FORMAT, new ConlluLemmaSampleStreamFactory(Parameters.class));
+  }
+
+  protected <P> ConlluLemmaSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  public ObjectStream<LemmaSample> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+    ConlluTagset tagset;
+
+    switch (params.getTagset()) {
+      case "u":
+        tagset = ConlluTagset.U;
+        break;
+      case  "x":
+        tagset = ConlluTagset.X;
+        break;
+      default:
+        throw new TerminateToolException(-1, "Unkown tagset parameter: " + params.getTagset());
+    }
+
+    InputStreamFactory inFactory =
+        CmdLineUtil.createInputStreamFactory(params.getData());
+
+    try {
+      return new ConlluLemmaSampleStream(new ConlluStream(inFactory), tagset);
+    } catch (IOException e) {
+      // That will throw an exception
+      CmdLineUtil.handleCreateObjectStreamError(e);
+    }
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluPOSSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluPOSSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluPOSSampleStream.java
new file mode 100644
index 0000000..28dddc0
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluPOSSampleStream.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+public class ConlluPOSSampleStream extends FilterObjectStream<ConlluSentence, POSSample> {
+
+  private final ConlluTagset tagset;
+
+  ConlluPOSSampleStream(ObjectStream<ConlluSentence> samples, ConlluTagset tagset) {
+    super(samples);
+    this.tagset = Objects.requireNonNull(tagset);
+  }
+
+  @Override
+  public POSSample read() throws IOException {
+    ConlluSentence sentence = samples.read();
+
+    if (sentence != null) {
+      List<String> tokens = new ArrayList<>();
+      List<String> tags = new ArrayList<>();
+
+      for (ConlluWordLine line : sentence.getWordLines()) {
+        tokens.add(line.getForm());
+        tags.add(line.getPosTag(tagset));
+      }
+
+      return new POSSample(tokens, tags);
+    }
+
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluPOSSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluPOSSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluPOSSampleStreamFactory.java
new file mode 100644
index 0000000..0f9d5f3
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluPOSSampleStreamFactory.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+import java.io.IOException;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.BasicFormatParams;
+import opennlp.tools.formats.AbstractSampleStreamFactory;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ConlluPOSSampleStreamFactory extends AbstractSampleStreamFactory<POSSample> {
+
+  public static final String CONLLU_FORMAT = "conllu";
+
+  interface Parameters extends BasicFormatParams {
+    @ArgumentParser.ParameterDescription(valueName = "tagset",
+        description = "u|x u for unified tags and x for language-specific part-of-speech tags")
+    @ArgumentParser.OptionalParameter(defaultValue = "u")
+    String getTagset();
+  }
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(POSSample.class,
+        CONLLU_FORMAT, new ConlluPOSSampleStreamFactory(Parameters.class));
+  }
+
+  protected <P> ConlluPOSSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  public ObjectStream<POSSample> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+    ConlluTagset tagset;
+
+    switch (params.getTagset()) {
+      case "u":
+        tagset = ConlluTagset.U;
+        break;
+      case  "x":
+        tagset = ConlluTagset.X;
+        break;
+      default:
+        throw new TerminateToolException(-1, "Unkown tagset parameter: " + params.getTagset());
+    }
+
+    InputStreamFactory inFactory =
+        CmdLineUtil.createInputStreamFactory(params.getData());
+
+    try {
+      return new ConlluPOSSampleStream(new ConlluStream(inFactory), tagset);
+    } catch (IOException e) {
+      // That will throw an exception
+      CmdLineUtil.handleCreateObjectStreamError(e);
+    }
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluSentence.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluSentence.java b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluSentence.java
new file mode 100644
index 0000000..5d92d89
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluSentence.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+import java.util.List;
+
+public class ConlluSentence {
+
+  private List<ConlluWordLine> wordLines;
+
+  ConlluSentence(List<ConlluWordLine> wordLines) {
+    this.wordLines = wordLines;
+  }
+
+  public List<ConlluWordLine> getWordLines() {
+    return wordLines;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluStream.java
new file mode 100644
index 0000000..873a9ed
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluStream.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.ParagraphStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+/**
+ * The CoNNL-U Format is specified here:
+ * http://universaldependencies.org/format.html
+ */
+public class ConlluStream implements ObjectStream<ConlluSentence> {
+  private final ObjectStream<String> sentenceStream;
+
+  public ConlluStream(InputStreamFactory in) throws IOException {
+    this.sentenceStream = new ParagraphStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
+  }
+
+  @Override
+  public ConlluSentence read() throws IOException {
+    String sentence = sentenceStream.read();
+
+    if (sentence != null) {
+      List<ConlluWordLine> wordLines = new ArrayList<>();
+
+      BufferedReader reader = new BufferedReader(new StringReader(sentence));
+
+      String line;
+      while ((line = reader.readLine())  != null) {
+        // # indicates a comment line and should be skipped
+        if (!line.trim().startsWith("#")) {
+          wordLines.add(new ConlluWordLine(line));
+        }
+      }
+
+      return new ConlluSentence(wordLines);
+    }
+
+    return null;
+  }
+
+  @Override
+  public void close() throws IOException {
+    sentenceStream.close();
+  }
+
+  @Override
+  public void reset() throws IOException, UnsupportedOperationException {
+    sentenceStream.reset();
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluTagset.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluTagset.java b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluTagset.java
new file mode 100644
index 0000000..f49f3fd
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluTagset.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+enum ConlluTagset {
+  U,
+  X
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluWordLine.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluWordLine.java b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluWordLine.java
new file mode 100644
index 0000000..9881bf1
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluWordLine.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+import opennlp.tools.util.InvalidFormatException;
+
+public class ConlluWordLine {
+
+  private final String id;
+  private final String form;
+  private final String lemma;
+  private final String uPosTag;
+  private final String xPosTag;
+  private final String feats;
+  private final String head;
+  private final String deprel;
+  private final String deps;
+  private final String misc;
+
+  ConlluWordLine(String line) throws InvalidFormatException {
+
+    String[] fields = line.split("\t");
+
+    if (fields.length != 10) {
+      throw new InvalidFormatException("Line must have exactly 10 fields");
+    }
+
+    id = fields[0];
+    form = fields[1];
+    lemma = fields[2];
+    uPosTag = fields[3];
+    xPosTag = fields[4];
+    feats = fields[5];
+    head = fields[6];
+    deprel = fields[7];
+    deps = fields[8];
+    misc = fields[9];
+  }
+
+  /**
+   * Retrieves the word index. An Integer starting at 1 for each new sentence;
+   * may be a range for multiword tokens; may be a decimal number for empty nodes.
+   */
+  public String getId() {
+    return id;
+  }
+
+  /**
+   * Retrieve the word form or punctuation symbol.
+   */
+  public String getForm() {
+    return form;
+  }
+
+  /**
+   * Retrieve the lemma or stem of the word form.
+   */
+  public String getLemma() {
+    return lemma;
+  }
+
+  /**
+   * Retrieve the Universal part-of-speech tag or the language-specific part-of-speech tag;
+   * underscore if not available.
+   *
+   * @param tagset the type of tag to retrieve, either universial (u) or language specific (x)
+   */
+  public String getPosTag(ConlluTagset tagset) {
+    switch (tagset) {
+      case U:
+        return uPosTag;
+      case X:
+        return xPosTag;
+      default:
+        throw new IllegalStateException("Unexpected tagset value: " + tagset);
+    }
+  }
+
+  /**
+   * Retrieve list of morphological features from the universal feature inventory or from a
+   * defined language-specific extension; underscore if not available.
+   */
+  public String getFeats() {
+    return feats;
+  }
+
+  /**
+   * Head of the current word, which is either a value of ID or zero (0).
+   */
+  public String getHead() {
+    return head;
+  }
+
+  /**
+   * Universal dependency relation to the HEAD (root iff HEAD = 0) or a
+   * defined language-specific subtype of one.
+   */
+  public String getDeprel() {
+    return deprel;
+  }
+
+  /**
+   * Enhanced dependency graph in the form of a list of head-deprel pairs.
+   */
+  public String getDeps() {
+    return deps;
+  }
+
+  /**
+   * Retrieve any other annotation.
+   */
+  public String getMisc() {
+    return misc;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/212cf147/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluWordLineTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluWordLineTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluWordLineTest.java
new file mode 100644
index 0000000..4676f6f
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/conllu/ConlluWordLineTest.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.conllu;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.InvalidFormatException;
+
+public class ConlluWordLineTest {
+
+  @Test
+  public void testParseLine() throws InvalidFormatException {
+    ConlluWordLine line = new ConlluWordLine(
+        "12\tHänden\tHand\tNOUN\tNN\tCase=Dat|Number=Plur\t5\tnmod\t_\t_");
+
+    Assert.assertEquals("12", line.getId());
+    Assert.assertEquals("Händen", line.getForm());
+    Assert.assertEquals("Hand", line.getLemma());
+    Assert.assertEquals("NOUN", line.getPosTag(ConlluTagset.U));
+    Assert.assertEquals("NN", line.getPosTag(ConlluTagset.X));
+    Assert.assertEquals("Case=Dat|Number=Plur", line.getFeats());
+    Assert.assertEquals("5", line.getHead());
+    Assert.assertEquals("nmod", line.getDeprel());
+    Assert.assertEquals("_", line.getDeps());
+    Assert.assertEquals("_", line.getMisc());
+  }
+}

[50/50] [abbrv] opennlp git commit: OPENNLP-1010: Fix NaiveBayes model writer

Posted by jo...@apache.org.

OPENNLP-1010: Fix NaiveBayes model writer

The previous sortValues method was based on Perceptron, but for some reason it was not working
for NaiveBayes. Changed it to the one from GIS fixed it.

this closes apache/opennlp#154


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/3ac2fb37
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/3ac2fb37
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/3ac2fb37

Branch: refs/heads/parser_regression
Commit: 3ac2fb37750595dfff573bb813b48a9d889052a2
Parents: ef4c667
Author: William D C M SILVA <co...@apache.org>
Authored: Fri Apr 14 09:35:36 2017 -0300
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:58 2017 +0200

----------------------------------------------------------------------
 .../ml/naivebayes/NaiveBayesModelWriter.java    |  71 ++++---
 .../NaiveBayesSerializedCorrectnessTest.java    | 184 +++++++++++++++++++
 2 files changed, 225 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/3ac2fb37/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
index bbb6eee..510bf76 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModelWriter.java
@@ -55,44 +55,55 @@ public abstract class NaiveBayesModelWriter extends AbstractModelWriter {
     }
   }
 
+
   protected ComparablePredicate[] sortValues() {
-    ComparablePredicate[] sortPreds;
-    ComparablePredicate[] tmpPreds = new ComparablePredicate[PARAMS.length];
-    int[] tmpOutcomes = new int[numOutcomes];
-    double[] tmpParams = new double[numOutcomes];
-    int numPreds = 0;
-    //remove parameters with 0 weight and predicates with no parameters
-    for (int pid = 0; pid < PARAMS.length; pid++) {
-      int numParams = 0;
-      double[] predParams = PARAMS[pid].getParameters();
-      int[] outcomePattern = PARAMS[pid].getOutcomes();
-      for (int pi = 0; pi < predParams.length; pi++) {
-        if (predParams[pi] != 0d) {
-          tmpOutcomes[numParams] = outcomePattern[pi];
-          tmpParams[numParams] = predParams[pi];
-          numParams++;
-        }
-      }
 
-      int[] activeOutcomes = new int[numParams];
-      double[] activeParams = new double[numParams];
+    ComparablePredicate[] sortPreds = new ComparablePredicate[PARAMS.length];
 
-      for (int pi = 0; pi < numParams; pi++) {
-        activeOutcomes[pi] = tmpOutcomes[pi];
-        activeParams[pi] = tmpParams[pi];
-      }
-      if (numParams != 0) {
-        tmpPreds[numPreds] = new ComparablePredicate(PRED_LABELS[pid], activeOutcomes, activeParams);
-        numPreds++;
-      }
+    int numParams = 0;
+    for (int pid = 0; pid < PARAMS.length; pid++) {
+      int[] predkeys = PARAMS[pid].getOutcomes();
+      // Arrays.sort(predkeys);
+      int numActive = predkeys.length;
+      double[] activeParams = PARAMS[pid].getParameters();
+
+      numParams += numActive;
+      /*
+       * double[] activeParams = new double[numActive];
+       *
+       * int id = 0; for (int i=0; i < predkeys.length; i++) { int oid =
+       * predkeys[i]; activeOutcomes[id] = oid; activeParams[id] =
+       * PARAMS[pid].getParams(oid); id++; }
+       */
+      sortPreds[pid] = new ComparablePredicate(PRED_LABELS[pid],
+          predkeys, activeParams);
     }
-    System.err.println("Compressed " + PARAMS.length + " parameters to " + numPreds);
-    sortPreds = new ComparablePredicate[numPreds];
-    System.arraycopy(tmpPreds, 0, sortPreds, 0, numPreds);
+
     Arrays.sort(sortPreds);
     return sortPreds;
   }
 
+  protected List<List<ComparablePredicate>> compressOutcomes(ComparablePredicate[] sorted) {
+    List<List<ComparablePredicate>> outcomePatterns = new ArrayList<>();
+    if (sorted.length > 0) {
+      ComparablePredicate cp = sorted[0];
+      List<ComparablePredicate> newGroup = new ArrayList<>();
+      for (int i = 0; i < sorted.length; i++) {
+        if (cp.compareTo(sorted[i]) == 0) {
+          newGroup.add(sorted[i]);
+        } else {
+          cp = sorted[i];
+          outcomePatterns.add(newGroup);
+          newGroup = new ArrayList<>();
+          newGroup.add(sorted[i]);
+        }
+      }
+      outcomePatterns.add(newGroup);
+    }
+    return outcomePatterns;
+  }
+
+
 
   protected List<List<ComparablePredicate>> computeOutcomePatterns(ComparablePredicate[] sorted) {
     ComparablePredicate cp = sorted[0];

http://git-wip-us.apache.org/repos/asf/opennlp/blob/3ac2fb37/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesSerializedCorrectnessTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesSerializedCorrectnessTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesSerializedCorrectnessTest.java
new file mode 100644
index 0000000..0146885
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesSerializedCorrectnessTest.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.naivebayes;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import opennlp.tools.ml.AbstractTrainer;
+import opennlp.tools.ml.model.AbstractDataIndexer;
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.TwoPassDataIndexer;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * Test for naive bayes classification correctness without smoothing
+ */
+public class NaiveBayesSerializedCorrectnessTest {
+
+  private DataIndexer testDataIndexer;
+
+  @Before
+  public void initIndexer() {
+    TrainingParameters trainingParameters = new TrainingParameters();
+    trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, "1");
+    trainingParameters.put(AbstractDataIndexer.SORT_PARAM, "false");;
+    testDataIndexer = new TwoPassDataIndexer();
+    testDataIndexer.init(trainingParameters, new HashMap<>());
+  }
+
+  @Test
+  public void testNaiveBayes1() throws IOException {
+
+    testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
+    NaiveBayesModel model1 =
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer);
+
+    NaiveBayesModel model2 = persistedModel(model1);
+
+    String label = "politics";
+    String[] context = {"bow=united", "bow=nations"};
+    Event event = new Event(label, context);
+
+    testModelOutcome(model1, model2, event);
+
+  }
+
+  @Test
+  public void testNaiveBayes2() throws IOException {
+
+    testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
+    NaiveBayesModel model1 =
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer);
+
+    NaiveBayesModel model2 = persistedModel(model1);
+
+    String label = "sports";
+    String[] context = {"bow=manchester", "bow=united"};
+    Event event = new Event(label, context);
+
+    testModelOutcome(model1, model2, event);
+
+  }
+
+  @Test
+  public void testNaiveBayes3() throws IOException {
+
+    testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
+    NaiveBayesModel model1 =
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer);
+
+    NaiveBayesModel model2 = persistedModel(model1);
+
+    String label = "politics";
+    String[] context = {"bow=united"};
+    Event event = new Event(label, context);
+
+    testModelOutcome(model1, model2, event);
+
+  }
+
+  @Test
+  public void testNaiveBayes4() throws IOException {
+
+    testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
+    NaiveBayesModel model1 =
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer);
+
+    NaiveBayesModel model2 = persistedModel(model1);
+
+    String label = "politics";
+    String[] context = {};
+    Event event = new Event(label, context);
+
+    testModelOutcome(model1, model2, event);
+
+  }
+
+
+  @Test
+  public void testPlainTextModel() throws IOException {
+    testDataIndexer.index(NaiveBayesCorrectnessTest.createTrainingStream());
+    NaiveBayesModel model1 =
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(testDataIndexer);
+
+
+    StringWriter sw1 = new StringWriter();
+
+    NaiveBayesModelWriter modelWriter =
+        new PlainTextNaiveBayesModelWriter(model1, new BufferedWriter(sw1));
+    modelWriter.persist();
+
+    NaiveBayesModelReader reader =
+        new PlainTextNaiveBayesModelReader(new BufferedReader(new StringReader(sw1.toString())));
+    reader.checkModelType();
+
+    NaiveBayesModel model2 = (NaiveBayesModel)reader.constructModel();
+
+    StringWriter sw2 = new StringWriter();
+    modelWriter = new PlainTextNaiveBayesModelWriter(model2, new BufferedWriter(sw2));
+    modelWriter.persist();
+
+    System.out.println(sw1.toString());
+    Assert.assertEquals(sw1.toString(), sw2.toString());
+
+  }
+
+  protected static NaiveBayesModel persistedModel(NaiveBayesModel model) throws IOException {
+    Path tempFilePath = Files.createTempFile("ptnb-", ".bin");
+    File file = tempFilePath.toFile();
+    NaiveBayesModelWriter modelWriter = new BinaryNaiveBayesModelWriter(model, tempFilePath.toFile());
+    modelWriter.persist();
+    NaiveBayesModelReader reader = new BinaryNaiveBayesModelReader(file);
+    reader.checkModelType();
+    return (NaiveBayesModel)reader.constructModel();
+  }
+
+  protected static void testModelOutcome(NaiveBayesModel model1, NaiveBayesModel model2, Event event) {
+    String[] labels1 = extractLabels(model1);
+    String[] labels2 = extractLabels(model2);
+
+    Assert.assertArrayEquals(labels1, labels2);
+
+    double[] outcomes1 = model1.eval(event.getContext());
+    double[] outcomes2 = model2.eval(event.getContext());
+
+    Assert.assertArrayEquals(outcomes1, outcomes2, 0.000000000001);
+
+  }
+
+  private static String[] extractLabels(NaiveBayesModel model) {
+    String[] labels = new String[model.getNumOutcomes()];
+    for (int i = 0; i < model.getNumOutcomes(); i++) {
+      labels[i] = model.getOutcome(i);
+    }
+    return labels;
+  }
+}

[07/50] [abbrv] opennlp git commit: [maven-release-plugin] prepare for next development iteration

Posted by jo...@apache.org.

[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/f9db192d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/f9db192d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/f9db192d

Branch: refs/heads/parser_regression
Commit: f9db192d33138c9d5048a2349f06e70b13719ff6
Parents: 4b8ebad
Author: smarthi <sm...@apache.org>
Authored: Wed Feb 1 11:03:42 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:51 2017 +0200

----------------------------------------------------------------------
 opennlp-brat-annotator/pom.xml   | 2 +-
 opennlp-distr/pom.xml            | 2 +-
 opennlp-docs/pom.xml             | 2 +-
 opennlp-morfologik-addon/pom.xml | 2 +-
 opennlp-tools/pom.xml            | 2 +-
 opennlp-uima/pom.xml             | 2 +-
 pom.xml                          | 4 ++--
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-brat-annotator/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
index 53517ca..1633deb 100644
--- a/opennlp-brat-annotator/pom.xml
+++ b/opennlp-brat-annotator/pom.xml
@@ -17,7 +17,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.2</version>
+		<version>1.7.3-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-distr/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index ae86021..c0a57c3 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -24,7 +24,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.2</version>
+		<version>1.7.3-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-docs/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml
index b765d91..fd2b0d1 100644
--- a/opennlp-docs/pom.xml
+++ b/opennlp-docs/pom.xml
@@ -24,7 +24,7 @@
   <parent>
 	<groupId>org.apache.opennlp</groupId>
 	<artifactId>opennlp</artifactId>
-	<version>1.7.2</version>
+	<version>1.7.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-morfologik-addon/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml
index 50844f2..1c384c7 100644
--- a/opennlp-morfologik-addon/pom.xml
+++ b/opennlp-morfologik-addon/pom.xml
@@ -24,7 +24,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.2</version>
+		<version>1.7.3-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-tools/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index 6cdb688..22fc017 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -25,7 +25,7 @@
   <parent>
     <groupId>org.apache.opennlp</groupId>
     <artifactId>opennlp</artifactId>
-    <version>1.7.2</version>
+    <version>1.7.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
     

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/opennlp-uima/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml
index 39f1040..070fec9 100644
--- a/opennlp-uima/pom.xml
+++ b/opennlp-uima/pom.xml
@@ -25,7 +25,7 @@
 	<parent>
 	    <groupId>org.apache.opennlp</groupId>
 	    <artifactId>opennlp</artifactId>
-	    <version>1.7.2</version>
+	    <version>1.7.3-SNAPSHOT</version>
 	    <relativePath>../pom.xml</relativePath>
     </parent>
     

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f9db192d/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index bbb48c8..98acfb1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
 
 	<groupId>org.apache.opennlp</groupId>
 	<artifactId>opennlp</artifactId>
-	<version>1.7.2</version>
+	<version>1.7.3-SNAPSHOT</version>
 	<packaging>pom</packaging>
 
 	<name>Apache OpenNLP Reactor</name>
@@ -40,7 +40,7 @@
 		<connection>scm:git:git@github.com:apache/opennlp.git</connection>
 		<developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/opennlp.git</developerConnection>
 		<url>https://git-wip-us.apache.org/repos/asf?p=opennlp.git</url>
-		<tag>opennlp-1.7.2</tag>
+		<tag>HEAD</tag>
 	</scm>
 
 	<mailingLists>

[13/50] [abbrv] opennlp git commit: NoJira: Update pom and jira version to 1.8.0

Posted by jo...@apache.org.

NoJira: Update pom and jira version to 1.8.0


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/daa9fcaa
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/daa9fcaa
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/daa9fcaa

Branch: refs/heads/parser_regression
Commit: daa9fcaa0722f59222c374028e2cee097a29fa12
Parents: ac787a4
Author: J�rn Kottmann <jo...@apache.org>
Authored: Thu Feb 9 18:41:20 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:52 2017 +0200

----------------------------------------------------------------------
 opennlp-brat-annotator/pom.xml   | 2 +-
 opennlp-distr/pom.xml            | 4 ++--
 opennlp-docs/pom.xml             | 2 +-
 opennlp-morfologik-addon/pom.xml | 2 +-
 opennlp-tools/pom.xml            | 2 +-
 opennlp-uima/pom.xml             | 2 +-
 pom.xml                          | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-brat-annotator/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
index 1633deb..6c7be0d 100644
--- a/opennlp-brat-annotator/pom.xml
+++ b/opennlp-brat-annotator/pom.xml
@@ -17,7 +17,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.3-SNAPSHOT</version>
+		<version>1.8.0-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-distr/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index c0a57c3..1ce102d 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -24,7 +24,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.3-SNAPSHOT</version>
+		<version>1.8.0-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 
@@ -127,7 +127,7 @@
               <phase>generate-resources</phase> 
               <goals><goal>jira-report</goal></goals>
                 <configuration>
-                  <fixVersionIds>12339150</fixVersionIds>
+                  <fixVersionIds>12339249</fixVersionIds>
                   <outputDirectory>${basedir}/target/issuesFixed/</outputDirectory>
                   <maxEntries>1000</maxEntries> <!-- hopefully, bigger than ever needed -->
                 </configuration>

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-docs/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-docs/pom.xml b/opennlp-docs/pom.xml
index fd2b0d1..fbf0b5c 100644
--- a/opennlp-docs/pom.xml
+++ b/opennlp-docs/pom.xml
@@ -24,7 +24,7 @@
   <parent>
 	<groupId>org.apache.opennlp</groupId>
 	<artifactId>opennlp</artifactId>
-	<version>1.7.3-SNAPSHOT</version>
+	<version>1.8.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-morfologik-addon/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml
index 1c384c7..c46f101 100644
--- a/opennlp-morfologik-addon/pom.xml
+++ b/opennlp-morfologik-addon/pom.xml
@@ -24,7 +24,7 @@
 	<parent>
 		<groupId>org.apache.opennlp</groupId>
 		<artifactId>opennlp</artifactId>
-		<version>1.7.3-SNAPSHOT</version>
+		<version>1.8.0-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-tools/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index c7e9624..d2630c9 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -25,7 +25,7 @@
   <parent>
     <groupId>org.apache.opennlp</groupId>
     <artifactId>opennlp</artifactId>
-    <version>1.7.3-SNAPSHOT</version>
+    <version>1.8.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
     

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/opennlp-uima/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml
index 070fec9..7cfdb72 100644
--- a/opennlp-uima/pom.xml
+++ b/opennlp-uima/pom.xml
@@ -25,7 +25,7 @@
 	<parent>
 	    <groupId>org.apache.opennlp</groupId>
 	    <artifactId>opennlp</artifactId>
-	    <version>1.7.3-SNAPSHOT</version>
+	    <version>1.8.0-SNAPSHOT</version>
 	    <relativePath>../pom.xml</relativePath>
     </parent>
     

http://git-wip-us.apache.org/repos/asf/opennlp/blob/daa9fcaa/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 98acfb1..268a54e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
 
 	<groupId>org.apache.opennlp</groupId>
 	<artifactId>opennlp</artifactId>
-	<version>1.7.3-SNAPSHOT</version>
+	<version>1.8.0-SNAPSHOT</version>
 	<packaging>pom</packaging>
 
 	<name>Apache OpenNLP Reactor</name>

[48/50] [abbrv] opennlp git commit: OPENNLP-1015: Add tests for DataIndexers

Posted by jo...@apache.org.

OPENNLP-1015: Add tests for DataIndexers

Closes #152


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/7589af69
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/7589af69
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/7589af69

Branch: refs/heads/parser_regression
Commit: 7589af69ea8a73326bed5e2f5b5c0445f95112eb
Parents: 5eb8ff8
Author: koji <ko...@rondhuit.com>
Authored: Fri Apr 7 21:50:02 2017 +0900
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:57 2017 +0200

----------------------------------------------------------------------
 .../tools/ml/model/OnePassDataIndexerTest.java  |  64 ++++++++++
 .../model/OnePassRealValueDataIndexerTest.java  | 116 +++++++++++++++++++
 .../ml/model/SimpleEventStreamBuilder.java      |  76 ++++++++++++
 .../tools/ml/model/TwoPassDataIndexerTest.java  |  64 ++++++++++
 4 files changed, 320 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/7589af69/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
new file mode 100644
index 0000000..e629e7a
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class OnePassDataIndexerTest {
+
+  @Test
+  public void testIndex() throws IOException {
+    // He belongs to <START:org> Apache Software Foundation <END> .
+    ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+        .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+        .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+        .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+              " powf=other,lc ppo=other")
+        .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+              " powf=other,ic ppo=other")
+        .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+              " pow=org-start,Software powf=org-start,ic ppo=other")
+        .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+              " powf=org-cont,ic ppo=org-start")
+        .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+              " ppo=org-cont")
+        .build();
+
+    DataIndexer indexer = new OnePassDataIndexer();
+    indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+    indexer.index(eventStream);
+    Assert.assertEquals(3, indexer.getContexts().length);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+    Assert.assertNull(indexer.getValues());
+    Assert.assertEquals(5, indexer.getNumEvents());
+    Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+    Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+    Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+    Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+    Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/7589af69/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
new file mode 100644
index 0000000..ab9eda3
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class OnePassRealValueDataIndexerTest {
+
+  DataIndexer indexer;
+
+  @Before
+  public void setUp() throws Exception {
+    indexer = new OnePassRealValueDataIndexer();
+    indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+  }
+
+  @Test
+  public void testIndex() throws IOException {
+    // He belongs to <START:org> Apache Software Foundation <END> .
+    ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+        .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+        .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+        .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+                    " powf=other,lc ppo=other")
+        .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+                    " powf=other,ic ppo=other")
+        .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+                " pow=org-start,Software powf=org-start,ic ppo=other")
+        .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+                    " powf=org-cont,ic ppo=org-start")
+        .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+                    " ppo=org-cont")
+        .build();
+
+    indexer.index(eventStream);
+    Assert.assertEquals(3, indexer.getContexts().length);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+    Assert.assertEquals(3, indexer.getValues().length);
+    Assert.assertNull(indexer.getValues()[0]);
+    Assert.assertNull(indexer.getValues()[1]);
+    Assert.assertNull(indexer.getValues()[2]);
+    Assert.assertEquals(5, indexer.getNumEvents());
+    Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+    Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+    Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+    Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+    Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+
+  @Test
+  public void testIndexValues() throws IOException {
+    // He belongs to <START:org> Apache Software Foundation <END> .
+    ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+        .add("other/w=he;0.1 n1w=belongs;0.2 n2w=to;0.1 po=other;0.1" +
+                " pow=other,He;0.1 powf=other,ic;0.1 ppo=other;0.1")
+        .add("other/w=belongs;0.1 p1w=he;0.2 n1w=to;0.1 n2w=apache;0.1" +
+                " po=other;0.1 pow=other,belongs;0.1 powf=other,lc;0.1 ppo=other;0.1")
+        .add("other/w=to;0.1 p1w=belongs;0.2 p2w=he;0.1 n1w=apache;0.1" +
+                " n2w=software;0.1 po=other;0.1 pow=other,to;0.1 powf=other,lc;0.1 ppo=other;0.1")
+        .add("org-start/w=apache;0.1 p1w=to;0.2 p2w=belongs;0.1 n1w=software;0.1 n2w=foundation;0.1" +
+                " po=other;0.1 pow=other,Apache;0.1 powf=other,ic;0.1 ppo=other;0.1")
+        .add("org-cont/w=software;0.1 p1w=apache;0.2 p2w=to;0.1 n1w=foundation;0.1" +
+                " n2w=.;0.1 po=org-start;0.1 pow=org-start,Software;0.1 powf=org-start,ic;0.1 ppo=other;0.1")
+        .add("org-cont/w=foundation;0.1 p1w=software;0.2 p2w=apache;0.1 n1w=.;0.1 po=org-cont;0.1" +
+                " pow=org-cont,Foundation;0.1 powf=org-cont,ic;0.1 ppo=org-start;0.1")
+        .add("other/w=.;0.1 p1w=foundation;0.1 p2w=software;0.1 po=org-cont;0.1 pow=org-cont,.;0.1" +
+                " powf=org-cont,other;0.1 ppo=org-cont;0.1")
+        .build();
+
+    indexer.index(eventStream);
+    System.out.println(indexer);
+    Assert.assertEquals(3, indexer.getContexts().length);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+    Assert.assertEquals(3, indexer.getValues().length);
+    final float delta = 0.001F;
+    Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+            indexer.getValues()[0], delta);
+    Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+            indexer.getValues()[1], delta);
+    Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+            indexer.getValues()[2], delta);
+    Assert.assertEquals(5, indexer.getNumEvents());
+    Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+    Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+    Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+    Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+    Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/7589af69/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
new file mode 100644
index 0000000..49fa242
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.util.ObjectStream;
+
+public class SimpleEventStreamBuilder {
+
+  private final List<Event> eventList = new ArrayList<>();
+  private int pos = 0;
+
+  /*
+   * the format of event should look like:
+   * without values) other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic
+   * with values) other/w=he;0.5 n1w=belongs;0.4 n2w=to;0.3 po=other;0.5 pow=other,He;0.25 powf=other,ic;0.5
+   */
+  public SimpleEventStreamBuilder add(String event) {
+    String[] ss = event.split("/");
+    if (ss.length != 2) {
+      throw new RuntimeException(String.format("format error of the event \"%s\"", event));
+    }
+
+    // look for context (and values)
+    String[] cvPairs = ss[1].split("\\s+");
+    if (cvPairs[0].contains(";")) { // has values?
+      String[] context = new String[cvPairs.length];
+      float[] values = new float[cvPairs.length];
+      for (int i = 0; i < cvPairs.length; i++) {
+        String[] pair = cvPairs[i].split(";");
+        if (pair.length != 2) {
+          throw new RuntimeException(String.format("format error of the event \"%s\". "
+                       + "\"%s\" doesn't have value", event, pair));
+        }
+        context[i] = pair[0];
+        values[i] = Float.parseFloat(pair[1]);
+      }
+      eventList.add(new Event(ss[0], context, values));
+    }
+    else {
+      eventList.add(new Event(ss[0], cvPairs));
+    }
+
+    return this;
+  }
+
+  public ObjectStream<Event> build() {
+    return new ObjectStream<Event>() {
+      @Override
+      public Event read() throws IOException {
+        if (eventList.size() <= pos) {
+          return null;
+        }
+        return eventList.get(pos++);
+      }
+    };
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/7589af69/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
new file mode 100644
index 0000000..c246936
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class TwoPassDataIndexerTest {
+
+  @Test
+  public void testIndex() throws IOException {
+    // He belongs to <START:org> Apache Software Foundation <END> .
+    ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+        .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+        .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+        .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+                    " powf=other,lc ppo=other")
+        .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+                    " powf=other,ic ppo=other")
+        .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+                    " pow=org-start,Software powf=org-start,ic ppo=other")
+        .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+                    " powf=org-cont,ic ppo=org-start")
+        .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+                    " ppo=org-cont")
+        .build();
+
+    DataIndexer indexer = new TwoPassDataIndexer();
+    indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+    indexer.index(eventStream);
+    Assert.assertEquals(3, indexer.getContexts().length);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+    Assert.assertNull(indexer.getValues());
+    Assert.assertEquals(5, indexer.getNumEvents());
+    Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+    Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+    Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+    Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+    Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+}

[39/50] [abbrv] opennlp git commit: this closes apache/opennlp#147

Posted by jo...@apache.org.

this closes apache/opennlp#147


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/61edfe54
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/61edfe54
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/61edfe54

Branch: refs/heads/parser_regression
Commit: 61edfe5402d187ea5c78445b9335b0577f5bc6e0
Parents: 178aeb3
Author: smarthi <sm...@apache.org>
Authored: Sun Mar 26 12:40:20 2017 -0400
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:56 2017 +0200

----------------------------------------------------------------------
 opennlp-docs/src/docbkx/namefinder.xml                             | 2 +-
 .../src/main/java/opennlp/tools/ml/maxent/io/package.html          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/61edfe54/opennlp-docs/src/docbkx/namefinder.xml
----------------------------------------------------------------------
diff --git a/opennlp-docs/src/docbkx/namefinder.xml b/opennlp-docs/src/docbkx/namefinder.xml
index 1ecb13c..1e72a82 100644
--- a/opennlp-docs/src/docbkx/namefinder.xml
+++ b/opennlp-docs/src/docbkx/namefinder.xml
@@ -388,7 +388,7 @@ new NameFinderME(model);]]>
 </generators>]]>
 				 </programlisting>
 		    The root element must be generators, each sub-element adds a feature generator to the configuration.
-		    The sample xml is constains additional feature generators with respect to the API defined above.
+		    The sample xml contains additional feature generators with respect to the API defined above.
 			</para>
 			<para>
 			The following table shows the supported elements:

http://git-wip-us.apache.org/repos/asf/opennlp/blob/61edfe54/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/package.html
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/package.html b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/package.html
index 5e1a59a..c6fa27b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/package.html
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/package.html
@@ -28,6 +28,6 @@
 <body bgcolor="white">
 
 Provides the I/O functionality of the maxent package including reading
-and writting models in several formats.
+and writing models in several formats.
 </body>
 </html>

[04/50] [abbrv] opennlp git commit: NoJira: Run coveralls:report after build

Posted by jo...@apache.org.

NoJira: Run coveralls:report after build


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/6f332610
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/6f332610
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/6f332610

Branch: refs/heads/parser_regression
Commit: 6f332610eb7a4fcc0b978b90f1fc9c8b253aa2b8
Parents: 51cd809
Author: J�rn Kottmann <jo...@apache.org>
Authored: Tue Feb 7 18:49:39 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:51 2017 +0200

----------------------------------------------------------------------
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/6f332610/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 81bf0f7..b4c83ad 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,4 +17,4 @@ before_install:
 script: mvn clean install
 
 after_success:
-  - mvn clean test -Pjacoco jacoco:report
\ No newline at end of file
+  - mvn clean test -Pjacoco jacoco:report coveralls:report

[12/50] [abbrv] opennlp git commit: NoJira: Fix badge rendering

Posted by jo...@apache.org.

NoJira: Fix badge rendering


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/a2049d6f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/a2049d6f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/a2049d6f

Branch: refs/heads/parser_regression
Commit: a2049d6fa4715b681a8b1ab7fb70a2c8923f8975
Parents: 6ecc17e
Author: smarthi <sm...@apache.org>
Authored: Tue Feb 7 22:51:13 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:52 2017 +0200

----------------------------------------------------------------------
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/a2049d6f/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 2d31eb1..02b146a 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ Welcome to Apache OpenNLP!
 
 [![Build Status](https://api.travis-ci.org/apache/opennlp.svg?branch=master)](https://travis-ci.org/apache/opennlp)
 [![Coverage Status](https://coveralls.io/repos/github/apache/opennlp/badge.svg?branch=master)](https://coveralls.io/github/apache/opennlp?branch=master)
-[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic])](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp)
+[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp/badge.svg?style=plastic)](https://maven-badges.herokuapp.com/maven-central/org.apache.opennlp/opennlp)
 [![Documentation Status](https://img.shields.io/:docs-latest-green.svg)](http://opennlp.apache.org/documentation.html)
 [![GitHub license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/apache/opennlp/master/LICENSE)
 [![Twitter Follow](https://img.shields.io/twitter/follow/ApacheOpennlp.svg?style=social)](https://twitter.com/ApacheOpenNLP)

[35/50] [abbrv] opennlp git commit: OPENNLP-998: Fixing Maven build on MacOS

Posted by jo...@apache.org.

OPENNLP-998: Fixing Maven build on MacOS

This closes #136


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/711d70b9
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/711d70b9
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/711d70b9

Branch: refs/heads/parser_regression
Commit: 711d70b9e4a5517911178c0e0a438487690552cd
Parents: d3c16d5
Author: Madhav Sharan <go...@gmail.com>
Authored: Sat Mar 4 11:16:25 2017 -0800
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:55 2017 +0200

----------------------------------------------------------------------
 opennlp-distr/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/711d70b9/opennlp-distr/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml
index 1ce102d..3f838cd 100644
--- a/opennlp-distr/pom.xml
+++ b/opennlp-distr/pom.xml
@@ -72,7 +72,7 @@
 							     many file have more than 100 chars.
 							     Right now only javadoc files are too long.
 							 -->
-							 <tarLongFileMode>gnu</tarLongFileMode>
+							 <tarLongFileMode>posix</tarLongFileMode>
 							 
 							 <finalName>apache-opennlp-${project.version}</finalName>
 						</configuration>

[40/50] [abbrv] opennlp git commit: OPENNLP-1003: Write a test case for the BioCodec class

Posted by jo...@apache.org.

OPENNLP-1003: Write a test case for the BioCodec class

This closes #141


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/17493d1c
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/17493d1c
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/17493d1c

Branch: refs/heads/parser_regression
Commit: 17493d1cd7505441a363846fbd0a7ec2a8d67a06
Parents: fc10d2e
Author: Peter Thygesen <pe...@gmail.com>
Authored: Tue Mar 14 23:22:24 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:56 2017 +0200

----------------------------------------------------------------------
 .../opennlp/tools/namefind/BioCodecTest.java    | 263 +++++++++++++++++++
 1 file changed, 263 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/17493d1c/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
new file mode 100644
index 0000000..c894742
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.Span;
+
+/**
+ * This is the test class for {@link BioCodec}.
+ */
+public class BioCodecTest {
+
+  private static final BioCodec codec = new BioCodec();
+
+  private static final String A_TYPE = "atype";
+  private static final String A_START = A_TYPE + "-" + BioCodec.START;
+  private static final String A_CONTINUE = A_TYPE + "-" + BioCodec.CONTINUE;
+
+  private static final String B_TYPE = "btype";
+  private static final String B_START = B_TYPE + "-" + BioCodec.START;
+  private static final String B_CONTINUE = B_TYPE + "-" + BioCodec.CONTINUE;
+
+  private static final String C_TYPE = "ctype";
+  private static final String C_START = C_TYPE + "-" + BioCodec.START;
+
+  private static final String OTHER = BioCodec.OTHER;
+
+  @Test
+  public void testEncodeNoNames() {
+    NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true);
+    String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeSingleTokenSpan() {
+    String[] sentence = "I called Julie again.".split(" ");
+    Span[] spans = new Span[] { new Span(2,3, A_TYPE)};
+    NameSample nameSample = new NameSample(sentence, spans, true);
+    String[] expected = new String[] {OTHER, OTHER, A_START, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should be 'other'.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeDoubleTokenSpan() {
+    String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
+    Span[] span = new Span[] { new Span(2,4, A_TYPE)};
+    NameSample nameSample = new NameSample(sentence, span, true);
+    String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " +
+        "'continue' and the rest should be 'other'.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeDoubleTokenSpanNoType() {
+    final String DEFAULT_START = "default" + "-" + BioCodec.START;
+    final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE;
+    String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
+    Span[] span = new Span[] { new Span(2,4, null)};
+    NameSample nameSample = new NameSample(sentence, span, true);
+    String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, DEFAULT_CONTINUE, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " +
+        "'continue' and the rest should be 'other'.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeAdjacentSingleSpans() {
+    String[] sentence = "something PersonA PersonB Something".split(" ");
+    Span[] span = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE) };
+    NameSample nameSample = new NameSample(sentence, span, true);
+    String[] expected = new String[] {OTHER, A_START, A_START, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  @Test
+  public void testEncodeAdjacentSpans() {
+    String[] sentence = "something PersonA PersonA PersonB Something".split(" ");
+    Span[] span = new Span[] { new Span(1,3, A_TYPE), new Span(3, 4, A_TYPE) };
+    NameSample nameSample = new NameSample(sentence, span, true);
+    String[] expected = new String[] {OTHER, A_START, A_CONTINUE, A_START, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  @Test
+  public void testCreateSequenceValidator() {
+    Assert.assertTrue(codec.createSequenceValidator() instanceof NameFinderSequenceValidator);
+  }
+
+
+  @Test
+  public void testDecodeEmpty() {
+    Span[] expected = new Span[] {};
+    Span[] actual = codec.decode(new ArrayList<String>());
+    Assert.assertArrayEquals(expected, actual);
+  }
+  /**
+   * Start, Other
+   */
+  @Test
+  public void testDecodeSingletonFirst() {
+
+    List<String> encoded = Arrays.asList(B_START, OTHER);
+    Span[] expected = new Span[] {new Span(0, 1, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start Start Other
+   */
+  @Test
+  public void testDecodeAdjacentSingletonFirst() {
+    List<String> encoded = Arrays.asList(B_START, B_START, OTHER);
+    Span[] expected = new Span[] {new Span(0, 1, B_TYPE), new Span(1, 2, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start Continue Other
+   */
+  @Test
+  public void testDecodePairFirst() {
+    List<String> encoded = Arrays.asList(B_START, B_CONTINUE, OTHER);
+    Span[] expected = new Span[] {new Span(0, 2, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start Continue Continue Other
+   */
+  @Test
+  public void testDecodeTripletFirst() {
+    List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_CONTINUE, OTHER);
+    Span[] expected = new Span[] {new Span(0, 3, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start Continue Start Other
+   */
+  @Test
+  public void testDecodeAdjacentPairSingleton() {
+    List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_START, OTHER);
+    Span[] expected = new Span[] {new Span(0, 2, B_TYPE), new Span(2, 3, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Other Start Other
+   */
+  @Test
+  public void testDecodeOtherFirst() {
+    List<String> encoded = Arrays.asList(OTHER, B_START, OTHER);
+    Span[] expected = new Span[] {new Span(1, 2, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * A-Start A-Continue, A-Continue, Other, B-Start, B-Continue, Other, C-Start, Other
+   */
+  @Test
+  public void testDecodeMultiClass() {
+    List<String> encoded = Arrays.asList(OTHER, A_START, A_CONTINUE, A_CONTINUE,
+        OTHER, B_START, B_CONTINUE, OTHER, C_START, OTHER);
+    Span[] expected = new Span[] {new Span(1, 4, A_TYPE),
+        new Span(5, 7, B_TYPE), new Span(8, 9, C_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  @Test
+  public void testCompatibilityEmpty() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {}));
+  }
+
+  @Test
+  public void testCompatibilitySingleStart() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START}));
+  }
+
+  @Test
+  public void testCompatibilitySingleContinue() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE}));
+  }
+
+  @Test
+  public void testCompatibilitySingleOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER}));
+  }
+
+  @Test
+  public void testCompatibilityStartContinue() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE}));
+  }
+
+  @Test
+  public void testCompatibilityStartOther() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, OTHER}));
+  }
+
+  @Test
+  public void testCompatibilityContinueOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE, OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE, OTHER}));
+  }
+
+  @Test
+  public void testCompatibilityStartContinueOther() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, OTHER}));
+  }
+
+
+  @Test
+  public void testCompatibilityMultiClass() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_CONTINUE, B_START, OTHER}));
+  }
+
+  @Test
+  public void testCompatibilityBadTag() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, "BAD"}));
+  }
+
+  @Test
+  public void testCompatibilityRepeated() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_START, A_CONTINUE, A_CONTINUE, B_START, B_START, OTHER, OTHER}));
+  }
+
+}