You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by sm...@apache.org on 2017/04/19 00:39:53 UTC
opennlp git commit: OPENNLP-1030: Add unit test for TokenNameFinderTool

Repository: opennlp
Updated Branches:
  refs/heads/master d8cdd5eef -> e2cf4811b


OPENNLP-1030: Add unit test for TokenNameFinderTool


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/e2cf4811
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/e2cf4811
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/e2cf4811

Branch: refs/heads/master
Commit: e2cf4811ba485b0bb29d7d80bc853666bfbfa958
Parents: d8cdd5e
Author: jzonthemtn <je...@mtnfog.com>
Authored: Tue Apr 18 20:39:07 2017 -0400
Committer: smarthi <sm...@apache.org>
Committed: Tue Apr 18 20:39:07 2017 -0400

----------------------------------------------------------------------
 .../cmdline/namefind/TokenNameFinderTool.java   |   1 +
 .../tools/cmdline/TokenNameFinderToolTest.java  | 137 +++++++++++++++++++
 2 files changed, 138 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/e2cf4811/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
index 59b2f3a..a5c9bd6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
@@ -47,6 +47,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
     return "Usage: " + CLI.CMD + " " + getName() + " model1 model2 ... modelN < sentences";
   }
 
+  @Override
   public void run(String[] args) {
 
     if (args.length == 0) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/e2cf4811/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
new file mode 100644
index 0000000..3ade0d5
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline;
+
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.namefind.NameSampleDataStream;
+import opennlp.tools.namefind.TokenNameFinderFactory;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.MockInputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class TokenNameFinderToolTest {
+
+  @Test
+  public void run() throws IOException {
+
+    File model1 = trainModel();
+
+    String[] args = new String[]{model1.getAbsolutePath()};
+    
+    final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence.";
+    InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
+    
+    System.setIn(stream);
+    
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream ps = new PrintStream(baos);
+    System.setOut(ps);
+
+    TokenNameFinderTool tool = new TokenNameFinderTool();
+    tool.run(args);
+    
+    final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8);
+    Assert.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>"));
+    
+  }
+  
+  @Test(expected = TerminateToolException.class)
+  public void invalidModel() {
+
+    String[] args = new String[]{"invalidmodel.bin"};
+
+    TokenNameFinderTool tool = new TokenNameFinderTool();
+    tool.run(args);
+
+  }
+  
+  @Test()
+  public void usage() {
+
+    String[] args = new String[]{};
+    
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream ps = new PrintStream(baos);
+    System.setOut(ps);
+
+    TokenNameFinderTool tool = new TokenNameFinderTool();
+    tool.run(args);
+
+    final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8);
+    Assert.assertEquals(tool.getHelp(), content.trim());
+    
+  }
+  
+  private File trainModel() throws IOException {
+    
+    String encoding = "ISO-8859-1";
+
+    ObjectStream<String> lineStream =
+        new PlainTextByLineStream(new MockInputStreamFactory(
+            new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")), encoding);
+    ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);
+
+    TrainingParameters params = new TrainingParameters();
+    params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
+    params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));
+    
+    TokenNameFinderModel model;
+
+    TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();
+
+    try {
+      model = NameFinderME.train("en", null, sampleStream, params,
+          nameFinderFactory);
+    }
+    finally {
+      sampleStream.close();
+    }
+
+    BufferedOutputStream modelOut = null;
+    
+    File modelFile = File.createTempFile("model", ".bin");
+    
+    try {
+      modelOut = new BufferedOutputStream(new FileOutputStream(modelFile));
+      model.serialize(modelOut);
+    } finally {
+      if (modelOut != null) 
+       modelOut.close();    
+    }
+    
+    return modelFile;
+  }
+  
+}