You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by sm...@apache.org on 2017/04/19 00:39:53 UTC
opennlp git commit: OPENNLP-1030: Add unit test for
TokenNameFinderTool
Repository: opennlp
Updated Branches:
refs/heads/master d8cdd5eef -> e2cf4811b
OPENNLP-1030: Add unit test for TokenNameFinderTool
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/e2cf4811
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/e2cf4811
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/e2cf4811
Branch: refs/heads/master
Commit: e2cf4811ba485b0bb29d7d80bc853666bfbfa958
Parents: d8cdd5e
Author: jzonthemtn <je...@mtnfog.com>
Authored: Tue Apr 18 20:39:07 2017 -0400
Committer: smarthi <sm...@apache.org>
Committed: Tue Apr 18 20:39:07 2017 -0400
----------------------------------------------------------------------
.../cmdline/namefind/TokenNameFinderTool.java | 1 +
.../tools/cmdline/TokenNameFinderToolTest.java | 137 +++++++++++++++++++
2 files changed, 138 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/e2cf4811/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
index 59b2f3a..a5c9bd6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
@@ -47,6 +47,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool {
return "Usage: " + CLI.CMD + " " + getName() + " model1 model2 ... modelN < sentences";
}
+ @Override
public void run(String[] args) {
if (args.length == 0) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/e2cf4811/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
new file mode 100644
index 0000000..3ade0d5
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline;
+
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.namefind.NameSampleDataStream;
+import opennlp.tools.namefind.TokenNameFinderFactory;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.MockInputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class TokenNameFinderToolTest {
+
+ @Test
+ public void run() throws IOException {
+
+ File model1 = trainModel();
+
+ String[] args = new String[]{model1.getAbsolutePath()};
+
+ final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence.";
+ InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
+
+ System.setIn(stream);
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream ps = new PrintStream(baos);
+ System.setOut(ps);
+
+ TokenNameFinderTool tool = new TokenNameFinderTool();
+ tool.run(args);
+
+ final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8);
+ Assert.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>"));
+
+ }
+
+ @Test(expected = TerminateToolException.class)
+ public void invalidModel() {
+
+ String[] args = new String[]{"invalidmodel.bin"};
+
+ TokenNameFinderTool tool = new TokenNameFinderTool();
+ tool.run(args);
+
+ }
+
+ @Test()
+ public void usage() {
+
+ String[] args = new String[]{};
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream ps = new PrintStream(baos);
+ System.setOut(ps);
+
+ TokenNameFinderTool tool = new TokenNameFinderTool();
+ tool.run(args);
+
+ final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8);
+ Assert.assertEquals(tool.getHelp(), content.trim());
+
+ }
+
+ private File trainModel() throws IOException {
+
+ String encoding = "ISO-8859-1";
+
+ ObjectStream<String> lineStream =
+ new PlainTextByLineStream(new MockInputStreamFactory(
+ new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")), encoding);
+ ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);
+
+ TrainingParameters params = new TrainingParameters();
+ params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
+ params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));
+
+ TokenNameFinderModel model;
+
+ TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();
+
+ try {
+ model = NameFinderME.train("en", null, sampleStream, params,
+ nameFinderFactory);
+ }
+ finally {
+ sampleStream.close();
+ }
+
+ BufferedOutputStream modelOut = null;
+
+ File modelFile = File.createTempFile("model", ".bin");
+
+ try {
+ modelOut = new BufferedOutputStream(new FileOutputStream(modelFile));
+ model.serialize(modelOut);
+ } finally {
+ if (modelOut != null)
+ modelOut.close();
+ }
+
+ return modelFile;
+ }
+
+}