You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/03/17 20:40:20 UTC

svn commit: r1301983 - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/namefind/DictionaryNameFinder.java test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java

Author: colen
Date: Sat Mar 17 19:40:20 2012
New Revision: 1301983

URL: http://svn.apache.org/viewvc?rev=1301983&view=rev
Log:
OPENNLP-477: DictionaryNameFinder now generates spans with a default type.

Added:
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java   (with props)
Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java?rev=1301983&r1=1301982&r2=1301983&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java Sat Mar 17 19:40:20 2012
@@ -33,6 +33,8 @@ public class DictionaryNameFinder implem
 
   private Dictionary mDictionary;
 
+  private static final String DEFAULT_TYPE = "default"; 
+
   /**
    * Initializes the current instance.
    *
@@ -62,7 +64,7 @@ public class DictionaryNameFinder implem
           StringList tokenList = new StringList(tokens);
 
           if (mDictionary.contains(tokenList)) {
-            foundName = new Span(startToken, endToken + 1);
+            foundName = new Span(startToken, endToken + 1, DEFAULT_TYPE);
           }
         }
       }

Added: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java?rev=1301983&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java (added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java Sat Mar 17 19:40:20 2012
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.Span;
+import opennlp.tools.util.StringList;
+import opennlp.tools.util.eval.FMeasure;
+
+import org.junit.Test;
+
+/**
+ * Tests the evaluation of a {@link DictionaryNameFinder}.
+ */
+public class DictionaryNameFinderEvaluatorTest {
+
+  @Test
+  public void testEvaluator() throws IOException, URISyntaxException {
+    DictionaryNameFinder nameFinder = new DictionaryNameFinder(
+        createDictionary());
+    TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(
+        nameFinder);
+    ObjectStream<NameSample> sample = createSample();
+
+    evaluator.evaluate(sample);
+    sample.close();
+    FMeasure fmeasure = evaluator.getFMeasure();
+
+    // TODO: why isn't it == 1?
+    assertTrue(fmeasure.getFMeasure() > 0);
+  }
+
+  /**
+   * Creates a NameSample stream using an annotated corpus
+   * 
+   * @return
+   * @throws IOException
+   * @throws URISyntaxException
+   */
+  private static ObjectStream<NameSample> createSample() throws IOException,
+      URISyntaxException {
+    FileInputStream sampleDataIn = new FileInputStream(new File(
+        DictionaryNameFinderEvaluatorTest.class.getClassLoader()
+            .getResource("opennlp/tools/namefind/AnnotatedSentences.txt")
+            .toURI()));
+
+    return new NameSampleDataStream(new PlainTextByLineStream(
+        sampleDataIn.getChannel(), "ISO-8859-1"));
+  }
+
+  /**
+   * Creates a dictionary with all names from the sample data.
+   * 
+   * @return a dictionary
+   * @throws IOException
+   * @throws URISyntaxException
+   */
+  private static Dictionary createDictionary() throws IOException,
+      URISyntaxException {
+    ObjectStream<NameSample> sampleStream = createSample();
+    NameSample sample = sampleStream.read();
+    List<String[]> entries = new ArrayList<String[]>();
+    while (sample != null) {
+      Span[] names = sample.getNames();
+      if (names != null && names.length > 0) {
+        String[] toks = sample.getSentence();
+        for (Span name : names) {
+          Span[] n = { name };
+          String[] nameToks = Span.spansToStrings(n, toks);
+          entries.add(nameToks);
+        }
+      }
+      sample = sampleStream.read();
+    }
+    sampleStream.close();
+    Dictionary dictionary = new Dictionary(true);
+    for (String[] entry : entries) {
+      StringList dicEntry = new StringList(entry);
+      dictionary.put(dicEntry);
+    }
+    return dictionary;
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain