You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/03/17 20:40:20 UTC
svn commit: r1301983 - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/namefind/DictionaryNameFinder.java
test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java
Author: colen
Date: Sat Mar 17 19:40:20 2012
New Revision: 1301983
URL: http://svn.apache.org/viewvc?rev=1301983&view=rev
Log:
OPENNLP-477: DictionaryNameFinder now generates spans with a default type.
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java (with props)
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java?rev=1301983&r1=1301982&r2=1301983&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java Sat Mar 17 19:40:20 2012
@@ -33,6 +33,8 @@ public class DictionaryNameFinder implem
private Dictionary mDictionary;
+ private static final String DEFAULT_TYPE = "default";
+
/**
* Initializes the current instance.
*
@@ -62,7 +64,7 @@ public class DictionaryNameFinder implem
StringList tokenList = new StringList(tokens);
if (mDictionary.contains(tokenList)) {
- foundName = new Span(startToken, endToken + 1);
+ foundName = new Span(startToken, endToken + 1, DEFAULT_TYPE);
}
}
}
Added: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java?rev=1301983&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java (added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java Sat Mar 17 19:40:20 2012
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.Span;
+import opennlp.tools.util.StringList;
+import opennlp.tools.util.eval.FMeasure;
+
+import org.junit.Test;
+
+/**
+ * Tests the evaluation of a {@link DictionaryNameFinder}.
+ */
+public class DictionaryNameFinderEvaluatorTest {
+
+ @Test
+ public void testEvaluator() throws IOException, URISyntaxException {
+ DictionaryNameFinder nameFinder = new DictionaryNameFinder(
+ createDictionary());
+ TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(
+ nameFinder);
+ ObjectStream<NameSample> sample = createSample();
+
+ evaluator.evaluate(sample);
+ sample.close();
+ FMeasure fmeasure = evaluator.getFMeasure();
+
+ // TODO: why isn't it == 1?
+ assertTrue(fmeasure.getFMeasure() > 0);
+ }
+
+ /**
+ * Creates a NameSample stream using an annotated corpus
+ *
+ * @return
+ * @throws IOException
+ * @throws URISyntaxException
+ */
+ private static ObjectStream<NameSample> createSample() throws IOException,
+ URISyntaxException {
+ FileInputStream sampleDataIn = new FileInputStream(new File(
+ DictionaryNameFinderEvaluatorTest.class.getClassLoader()
+ .getResource("opennlp/tools/namefind/AnnotatedSentences.txt")
+ .toURI()));
+
+ return new NameSampleDataStream(new PlainTextByLineStream(
+ sampleDataIn.getChannel(), "ISO-8859-1"));
+ }
+
+ /**
+ * Creates a dictionary with all names from the sample data.
+ *
+ * @return a dictionary
+ * @throws IOException
+ * @throws URISyntaxException
+ */
+ private static Dictionary createDictionary() throws IOException,
+ URISyntaxException {
+ ObjectStream<NameSample> sampleStream = createSample();
+ NameSample sample = sampleStream.read();
+ List<String[]> entries = new ArrayList<String[]>();
+ while (sample != null) {
+ Span[] names = sample.getNames();
+ if (names != null && names.length > 0) {
+ String[] toks = sample.getSentence();
+ for (Span name : names) {
+ Span[] n = { name };
+ String[] nameToks = Span.spansToStrings(n, toks);
+ entries.add(nameToks);
+ }
+ }
+ sample = sampleStream.read();
+ }
+ sampleStream.close();
+ Dictionary dictionary = new Dictionary(true);
+ for (String[] entry : entries) {
+ StringList dicEntry = new StringList(entry);
+ dictionary.put(dicEntry);
+ }
+ return dictionary;
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain