You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/01/26 00:43:11 UTC
svn commit: r1063526 -
/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
Author: colen
Date: Tue Jan 25 23:43:10 2011
New Revision: 1063526
URL: http://svn.apache.org/viewvc?rev=1063526&view=rev
Log:
OPENNLP-89 Created unit tests for ChunkerME
Added:
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java (with props)
Added: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java?rev=1063526&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java Tue Jan 25 23:43:10 2011
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.List;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.Sequence;
+import opennlp.tools.util.Span;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * This is the test class for {@link NameFinderME}.
+ * <p>
+ * A proper testing and evaluation of the name finder is only possible with a
+ * large corpus which contains a huge amount of test sentences.
+ * <p>
+ * The scope of this test is to make sure that the name finder code can be
+ * executed. This test can not detect mistakes which lead to incorrect feature
+ * generation or other mistakes which decrease the tagging performance of the
+ * name finder.
+ * <p>
+ * In this test the {@link NameFinderME} is trained with a small amount of
+ * training sentences and then the computed model is used to predict sentences
+ * from the training sentences.
+ */
+public class ChunkerMETest {
+
+ private Chunker chunker;
+
+ String[] toks1 = { "Rockwell", "said", "the", "agreement", "calls", "for",
+ "it", "to", "supply", "200", "additional", "so-called", "shipsets",
+ "for", "the", "planes", "." };
+
+ String[] tags1 = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
+ "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." };
+
+ String[] expect1 = { "B-NP", "B-VP", "B-NP", "I-NP", "B-VP", "B-SBAR",
+ "B-NP", "B-VP", "I-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-PP", "B-NP",
+ "I-NP", "O" };
+
+ @Before
+ public void startup() throws IOException {
+ // train the chunker
+
+ InputStream in = getClass().getClassLoader().getResourceAsStream(
+ "opennlp/tools/chunker/test.txt");
+
+ String encoding = "UTF-8";
+
+ ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream(
+ new PlainTextByLineStream(new InputStreamReader(in, encoding)));
+
+ ChunkerModel chunkerModel = ChunkerME.train("en", sampleStream, 1, 70);
+
+ this.chunker = new ChunkerME(chunkerModel);
+ }
+
+ @Test
+ public void testChunkAsArray() throws Exception {
+
+ String preds[] = chunker.chunk(toks1, tags1);
+
+ assertArrayEquals(expect1, preds);
+ }
+
+ @Test
+ public void testChunkAsSpan() throws Exception {
+
+ Span[] preds = chunker.chunkAsSpans(toks1, tags1);
+ System.out.println(Arrays.toString(preds));
+
+ assertEquals(10, preds.length);
+ assertEquals(new Span(0, 1, "NP"), preds[0]);
+ assertEquals(new Span(1, 2, "VP"), preds[1]);
+ assertEquals(new Span(2, 4, "NP"), preds[2]);
+ assertEquals(new Span(4, 5, "VP"), preds[3]);
+ assertEquals(new Span(5, 6, "SBAR"), preds[4]);
+ assertEquals(new Span(6, 7, "NP"), preds[5]);
+ assertEquals(new Span(7, 9, "VP"), preds[6]);
+ assertEquals(new Span(9, 13, "NP"), preds[7]);
+ assertEquals(new Span(13, 14, "PP"), preds[8]);
+ assertEquals(new Span(14, 16, "NP"), preds[9]);
+
+ }
+
+ @Test
+ public void testChunkAsList() throws Exception {
+
+ @SuppressWarnings("deprecation")
+ List<String> preds = chunker.chunk(Arrays.asList(toks1),
+ Arrays.asList(tags1));
+
+ assertEquals(Arrays.asList(expect1), preds);
+ }
+
+ @Test
+ public void testTokenProb() throws Exception {
+
+ Sequence[] preds = chunker.topKSequences(Arrays.asList(toks1),
+ Arrays.asList(tags1));
+
+ assertTrue(preds.length > 0);
+ assertEquals(expect1.length, preds[0].getProbs().length);
+ assertEquals(Arrays.asList(expect1), preds[0].getOutcomes());
+ assertNotSame(Arrays.asList(expect1), preds[1].getOutcomes());
+ }
+
+ @Test
+ public void testTokenProbArray() throws Exception {
+
+ Sequence[] preds = chunker.topKSequences(toks1, tags1, -5.55);
+
+ assertTrue(preds.length == 4);
+ assertEquals(expect1.length, preds[0].getProbs().length);
+ assertEquals(Arrays.asList(expect1), preds[0].getOutcomes());
+ assertNotSame(Arrays.asList(expect1), preds[1].getOutcomes());
+ }
+
+}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain