You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@opennlp.apache.org by Jörn Kottmann <ko...@gmail.com> on 2013/10/21 14:16:11 UTC
Re: svn commit: r1533883 - in /opennlp/sandbox/modelbuilder-prototype:
./ src/ src/main/ src/main/java/ src/main/java/modelbuilder/ src/main/java/opennlp/
src/main/java/opennlp/modelbuilder/ src/main/java/opennlp/modelbuilder/v2/
src/main/java/opennlp/mode...
Hello,
all files which are checked in should have the AL header.
Can you please add the header to these files?
Have a look at the source files in opennlp-tools and just copy their
headers over.
Some IDEs allow to configure the header, it will then be automatically
inserted
when a new class is created.
HTH,
Jörn
On 10/20/2013 03:00 PM, markg@apache.org wrote:
> Author: markg
> Date: Sun Oct 20 13:00:17 2013
> New Revision: 1533883
>
> URL: http://svn.apache.org/r1533883
> Log:
> Prototype of a tool to allow users to create models from of a set of known entities based on their own data in the form of sentences.
> See the Example class in the .v2 package.
>
> Added:
> opennlp/sandbox/modelbuilder-prototype/pom.xml
> opennlp/sandbox/modelbuilder-prototype/src/
> opennlp/sandbox/modelbuilder-prototype/src/main/
> opennlp/sandbox/modelbuilder-prototype/src/main/java/
> opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/
> opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java
> opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java
> opennlp/sandbox/modelbuilder-prototype/src/main/resources/
> opennlp/sandbox/modelbuilder-prototype/src/test/
> opennlp/sandbox/modelbuilder-prototype/src/test/java/
> opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/
> opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java
>
> Added: opennlp/sandbox/modelbuilder-prototype/pom.xml
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/pom.xml?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/pom.xml (added)
> +++ opennlp/sandbox/modelbuilder-prototype/pom.xml Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,30 @@
> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
> + <modelVersion>4.0.0</modelVersion>
> +
> + <groupId>modelbuilder</groupId>
> + <artifactId>modelbuilder-prototype</artifactId>
> + <version>1.0-SNAPSHOT</version>
> + <packaging>jar</packaging>
> +
> + <name>modelbuilder-prototype</name>
> + <url>http://maven.apache.org</url>
> +
> + <properties>
> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
> + </properties>
> +
> + <dependencies>
> + <dependency>
> + <groupId>junit</groupId>
> + <artifactId>junit</artifactId>
> + <version>3.8.1</version>
> + <scope>test</scope>
> + </dependency>
> + <dependency>
> + <groupId>org.apache.opennlp</groupId>
> + <artifactId>opennlp-tools</artifactId>
> + <version>1.6.0-SNAPSHOT</version>
> + </dependency>
> + </dependencies>
> +</project>
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,13 @@
> +package modelbuilder;
> +
> +/**
> + * Hello world!
> + *
> + */
> +public class App
> +{
> + public static void main( String[] args )
> + {
> +
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,67 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.HashMap;
> +import java.util.Map;
> +import opennlp.modelbuilder.v2.impls.FileKnownEntityProvider;
> +import opennlp.modelbuilder.v2.impls.FileModelValidatorImpl;
> +import opennlp.modelbuilder.v2.impls.FileSentenceProvider;
> +import opennlp.modelbuilder.v2.impls.ModelableImpl;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class Example {
> +
> + public static void main(String[] args) {
> +
> + GenericModelGenerator modelGenerator = new GenericModelGenerator();
> + //every component has a map as a place to recieve params
> + //these are required for the current file-based impls
> + Map<String, String> params = new HashMap<String, String>();
> + params.put("sentencesfile", "/the/file");
> + params.put("knownentityfile", "/the/file");
> + params.put("knownentitytype", "person");
> + params.put("blacklistfile", "/the/file");
> + params.put("modelablepath", "/the/file");
> +
> + /**
> + * sentence providers feed this process with user data derived sentences
> + * this impl just reads line by line through a file
> + */
> + SentenceProvider sentenceProvider = new FileSentenceProvider();
> + sentenceProvider.setParameters(params);
> + /**
> + *KnownEntityProviders provide a seed list of known entities... such as Barack Obama for person, or Germany for location
> + * obviously these would want to be prolific, non ambiguous names
> + */
> + KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
> + knownEntityProvider.setParameters(params);
> + /**
> + * ModelGenerationValidators try to weed out bad hits by the iterations of the name finder.
> + * Since this is a recursive process, with each iteration the namefinder will get more and more greedy if bad entities are allowed in
> + * this provides a mechanism for throwing out obviously bad hits.
> + * A good impl may be to make sure a location is actually within a noun phrase etc...users can make this as specific as they need for their dat
> + * and their use case
> + */
> + ModelGenerationValidator validator = new FileModelValidatorImpl();
> + validator.setParameters(params);
> + /**
> + * Modelable's write and read the annotated sentences, as well as create and write the NER models
> + */
> +
> + Modelable modelable = new ModelableImpl();
> + modelable.setParameters(params);
> +
> + /**
> + * the modelGenerator actually runs the process with a set number of iterations... could be better by actually calculating the
> + * diff between runs and stopping based on a thresh, but for extrememly large sentence sets this may be too much.
> + */
> + modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, 3);
> +
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,70 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.HashMap;
> +import java.util.Map;
> +import opennlp.tools.namefind.NameFinderME;
> +import opennlp.tools.util.Span;
> +
> +/**
> + *
> + *
> + */
> +public class GenericModelGenerator implements SemiSupervisedModelGenerator{
> + private Map<String, String> params = new HashMap<String, String>();
> +
> + @Override
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + }
> + @Override
> + public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
> + ModelGenerationValidator validator, Modelable modelable, int iterations) {
> + for (int iteration = 0; iteration < iterations; iteration++) {
> + System.out.println("ITERATION: " + iteration);
> + System.out.println("\tPerfoming Known Entity Annotation");
> + System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
> + System.out.println("\t\treading data....: ");
> + for (String sentence : sentenceProvider.getSentences()) {
> + for (String knownEntity : knownEntityProvider.getKnownEntities()) {
> + if (sentence.contains(knownEntity)) {
> + //if the same sentence has multiple hits should they be annotated separately?
> + modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType()));
> + }
> + }
> + }
> + System.out.println("\t\twriting annotated sentences....: ");
> + modelable.writeAnnotatedSentences();
> + modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
> + NameFinderME nf = new NameFinderME(modelable.getModel());
> + System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
> + System.out.println("\tPerforming NER");
> + for (String sentence : sentenceProvider.getSentences()) {
> + if (!validator.validSentence(sentence)) {
> + continue;
> + }
> + String[] tokens = modelable.tokenizeSentenceToWords(sentence);
> +
> + Span[] find = nf.find(tokens);
> + nf.clearAdaptiveData();
> +
> + String[] namedEntities = Span.spansToStrings(find, tokens);
> +
> + for (String namedEntity : namedEntities) {
> + if (validator.validNamedEntity(namedEntity)) {
> + knownEntityProvider.addKnownEntity(namedEntity);
> + modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType()));
> +
> + }
> + }
> + }
> + System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
> + System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
> + }
> + modelable.writeAnnotatedSentences();
> + modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,35 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.List;
> +import java.util.Set;
> +
> +
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface KnownEntityProvider extends ModelParameter{
> + /**
> + * returns a list of known non ambiguous entities.
> + * @return a set of entities
> + */
> + Set<String> getKnownEntities();
> +/**
> + * adds to the set of known entities. Overriding classes should hold this list in a class level set.
> + * @param unambiguousEntity
> + */
> + void addKnownEntity(String unambiguousEntity);
> +/**
> + * defines the type of entity that the set contains, ie person, location, organization.
> + * @return
> + */
> + String getKnownEntitiesType();
> +
> +
> +
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,23 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.Collection;
> +import java.util.Set;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface ModelGenerationValidator extends ModelParameter {
> +
> + Boolean validSentence(String sentence);
> +
> + Boolean validNamedEntity(String namedEntity);
> +
> +
> +
> + Collection<String> getBlackList();
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,17 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.HashMap;
> +import java.util.Map;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface ModelParameter {
> +
> + void setParameters(Map<String, String> params);
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,37 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.List;
> +import java.util.Map;
> +import java.util.Set;
> +import opennlp.tools.namefind.TokenNameFinderModel;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface Modelable extends ModelParameter{
> +
> +
> +
> + String annotate(String sentence, String namedEntity, String entityType);
> +
> + void writeAnnotatedSentences();
> +
> + Set<String> getAnnotatedSentences();
> +
> + void setAnnotatedSentences(Set<String> annotatedSentences);
> +
> + void addAnnotatedSentence(String annotatedSentence);
> +
> + void buildModel( String entityType);
> +
> + TokenNameFinderModel getModel();
> +
> + String[] tokenizeSentenceToWords(String sentence);
> +
> +
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,15 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface SemiSupervisedModelGenerator extends ModelParameter {
> +
> + void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
> + ModelGenerationValidator validator, Modelable modelable, int iterations);
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,16 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.Set;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface SentenceProvider extends ModelParameter {
> +
> + Set<String> getSentences();
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,74 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.io.BufferedReader;
> +import java.io.FileInputStream;
> +import java.io.FileNotFoundException;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.InputStreamReader;
> +import java.nio.charset.Charset;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import opennlp.modelbuilder.v2.KnownEntityProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class FileKnownEntityProvider implements KnownEntityProvider {
> + private Map<String, String> params = new HashMap<String, String>();
> + Set<String> knownEntities = new HashSet<String>();
> +
> + @Override
> + public Set<String> getKnownEntities() {
> + if (knownEntities.isEmpty()) {
> + try {
> + InputStream fis;
> + BufferedReader br;
> + String line;
> +
> + fis = new FileInputStream(params.get("knownentityfile"));
> + br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
> + while ((line = br.readLine()) != null) {
> + knownEntities.add(line);
> + }
> +
> + // Done with the file
> + br.close();
> + br = null;
> + fis = null;
> + } catch (FileNotFoundException ex) {
> + Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> + } catch (IOException ex) {
> + Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> + }
> + }
> + return knownEntities;
> + }
> +
> + @Override
> + public void addKnownEntity(String unambiguousEntity) {
> + knownEntities.add(unambiguousEntity);
> + }
> +
> + @Override
> + public String getKnownEntitiesType() {
> +
> + return params.get("knownentitytype");
> + }
> +
> +
> +
> + @Override
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,88 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.io.BufferedReader;
> +import java.io.FileInputStream;
> +import java.io.FileNotFoundException;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.InputStreamReader;
> +import java.nio.charset.Charset;
> +import java.util.Collection;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import java.util.regex.Pattern;
> +import opennlp.modelbuilder.v2.ModelGenerationValidator;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class FileModelValidatorImpl implements ModelGenerationValidator {
> +
> + private Set<String> badentities = new HashSet<String>();
> + private final double MIN_SCORE_FOR_TRAINING = 0.95d;
> + private Object validationData;
> + private Map<String, String> params = new HashMap<String, String>();
> +
> + @Override
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + }
> +
> + @Override
> + public Boolean validSentence(String sentence) {
> + //returning true by default, because the sentence provider will return only "valid" sentences in this case
> + return true;
> + }
> +
> + @Override
> + public Boolean validNamedEntity(String namedEntity) {
> +
> + if (badentities.isEmpty()) {
> + getBlackList();
> + }
> +
> + Pattern p = Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
> + if (p.matcher(namedEntity).find()) {
> + return false;
> + }
> + Boolean b = true;
> + if (badentities.contains(namedEntity.toLowerCase())) {
> + b = false;
> + }
> + return b;
> + }
> +
> + @Override
> + public Collection<String> getBlackList() {
> + if (!badentities.isEmpty()) {
> + try {
> + InputStream fis;
> + BufferedReader br;
> + String line;
> +
> + fis = new FileInputStream(params.get("blacklistfile"));
> + br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
> + while ((line = br.readLine()) != null) {
> + badentities.add(line);
> + }
> + br.close();
> + br = null;
> + fis = null;
> + } catch (FileNotFoundException ex) {
> + Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> + } catch (IOException ex) {
> + Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> + }
> + }
> + return badentities;
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,60 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.io.BufferedReader;
> +import java.io.FileInputStream;
> +import java.io.FileNotFoundException;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.InputStreamReader;
> +import java.nio.charset.Charset;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import opennlp.modelbuilder.v2.SentenceProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class FileSentenceProvider implements SentenceProvider {
> +
> + private Map<String, String> params = new HashMap<String, String>();
> + Set<String> sentences = new HashSet<String>();
> +
> + public Set<String> getSentences() {
> + if (sentences.isEmpty()) {
> + try {
> + InputStream fis;
> + BufferedReader br;
> + String line;
> +
> + fis = new FileInputStream(params.get("sentencesfile"));
> + br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
> + while ((line = br.readLine()) != null) {
> + sentences.add(line);
> + }
> +
> + // Done with the file
> + br.close();
> + br = null;
> + fis = null;
> + } catch (FileNotFoundException ex) {
> + Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> + } catch (IOException ex) {
> + Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> + }
> + }
> + return sentences;
> + }
> +
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,93 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.sql.CallableStatement;
> +import java.sql.Connection;
> +import java.sql.DriverManager;
> +import java.sql.ResultSet;
> +import java.sql.SQLException;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import opennlp.modelbuilder.v2.KnownEntityProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class LocationKnownEntityProviderImpl implements KnownEntityProvider {
> +
> + Set<String> ret = new HashSet<String>();
> +
> + @Override
> + public Set<String> getKnownEntities() {
> + if (ret.isEmpty()) {
> + try {
> + getData();
> + } catch (Exception ex) {
> + Logger.getLogger(LocationKnownEntityProviderImpl.class.getName()).log(Level.SEVERE, null, ex);
> + }
> +
> + }
> + return ret;
> + }
> + private Set<String> getData() throws Exception {
> +
> + Connection con = getMySqlConnection();
> + if (con.isClosed()) {
> + con = getMySqlConnection();
> + }
> + CallableStatement cs;
> + cs = con.prepareCall("CALL getcountrylist()");
> +
> + ResultSet rs;
> + try {
> + rs = cs.executeQuery();
> + while (rs.next()) {
> + ret.add(rs.getString("full_name_nd_ro"));
> + }
> +
> + } catch (SQLException ex) {
> + throw ex;
> + } catch (Exception e) {
> + System.err.println(e);
> + } finally {
> + con.close();
> + }
> +
> + return ret;
> + }
> + private static Connection getMySqlConnection() throws Exception {
> + // EntityLinkerProperties property = new EntityLinkerProperties(new File("c:\\temp\\opennlpmodels\\entitylinker.properties"));
> + String driver = "org.gjt.mm.mysql.Driver";
> + String url = "jdbc:mysql://127.0.0.1:3306/world";
> + String username = "root";
> + String password = "559447";
> +
> + Class.forName(driver);
> + Connection conn = DriverManager.getConnection(url, username, password);
> + return conn;
> + }
> + @Override
> + public String getKnownEntitiesType() {
> + return "location";
> + }
> +
> + @Override
> + public void addKnownEntity(String unambiguousEntity) {
> + ret.add(unambiguousEntity);
> + }
> +
> + private Map<String, String> params = new HashMap<String, String>();
> +
> + @Override
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,131 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.regex.Pattern;
> +import opennlp.modelbuilder.v2.ModelGenerationValidator;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class ModelValidatorImpl implements ModelGenerationValidator {
> +
> + private Set<String> badentities = new HashSet<String>();
> + private final double MIN_SCORE_FOR_TRAINING = 0.95d;
> + private Object validationData;
> + private Map<String, String> params = new HashMap<String, String>();
> +
> + @Override
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + }
> + @Override
> + public Boolean validSentence(String sentence) {
> + //returning true by default, because the sentence provider will return only "valid" sentences in this case
> + return true;
> + }
> +
> + @Override
> + public Boolean validNamedEntity(String namedEntity) {
> +
> + if (badentities.isEmpty()) {
> + getBlackList();
> + }
> +
> + Pattern p = Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
> + if (p.matcher(namedEntity).find()) {
> + return false;
> + }
> + Boolean b = true;
> + if (badentities.contains(namedEntity.toLowerCase())) {
> + b = false;
> + }
> + return b;
> + }
> +
> +
> + @Override
> + public Set<String> getBlackList() {
> + badentities.add(".");
> + badentities.add("-");
> + badentities.add(",");
> + badentities.add(";");
> + badentities.add("the");
> + badentities.add("that");
> + badentities.add("several");
> + badentities.add("model");
> + badentities.add("our");
> + badentities.add("are");
> + badentities.add("in");
> + badentities.add("are");
> + badentities.add("at");
> + badentities.add("is");
> + badentities.add("for");
> + badentities.add("the");
> + badentities.add("during");
> + badentities.add("south");
> + badentities.add("from");
> + badentities.add("recounts");
> + badentities.add("wissenschaftliches");
> + badentities.add("if");
> + badentities.add("security");
> + badentities.add("denouncing");
> + badentities.add("writes");
> + badentities.add("but");
> + badentities.add("operation");
> + badentities.add("adds");
> + badentities.add("Above");
> + badentities.add("but");
> + badentities.add("RIP");
> + badentities.add("on");
> + badentities.add("no");
> + badentities.add("agrees");
> + badentities.add("year");
> + badentities.add("for");
> + badentities.add("you");
> + badentities.add("red");
> + badentities.add("added");
> + badentities.add("hello");
> + badentities.add("around");
> + badentities.add("has");
> + badentities.add("turn");
> + badentities.add("surrounding");
> + badentities.add("\" No");
> + badentities.add("aug.");
> + badentities.add("or");
> + badentities.add("quips");
> + badentities.add("september");
> + badentities.add("[mr");
> + badentities.add("diseases");
> + badentities.add("when");
> + badentities.add("bbc");
> + badentities.add(":\"");
> + badentities.add("dr");
> + badentities.add("baby");
> + badentities.add("on");
> + badentities.add("route");
> + badentities.add("'");
> + badentities.add("\"");
> + badentities.add("a");
> + badentities.add("her");
> + badentities.add("'");
> + badentities.add("\"");
> + badentities.add("two");
> + badentities.add("that");
> + badentities.add(":");
> + badentities.add("one");
> + badentities.add("Party");
> + badentities.add("Championship");
> +
> + badentities.add("Ltd");
> +
> + return badentities;
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,137 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.io.BufferedOutputStream;
> +import java.io.File;
> +import java.io.FileInputStream;
> +import java.io.FileOutputStream;
> +import java.io.FileWriter;
> +import java.io.IOException;
> +import java.io.OutputStream;
> +import java.nio.charset.Charset;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import opennlp.modelbuilder.v2.Modelable;
> +import opennlp.tools.namefind.NameFinderME;
> +import opennlp.tools.namefind.NameSample;
> +import opennlp.tools.namefind.NameSampleDataStream;
> +import opennlp.tools.namefind.TokenNameFinderModel;
> +import opennlp.tools.tokenize.TokenizerME;
> +import opennlp.tools.tokenize.TokenizerModel;
> +import opennlp.tools.util.ObjectStream;
> +import opennlp.tools.util.PlainTextByLineStream;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class ModelableImpl implements Modelable {
> +
> + private TokenizerModel tm;
> + private TokenizerME wordBreaker;
> + private String path = "c:\\temp\\opennlpmodels\\";
> + private Set<String> annotatedSentences = new HashSet<String>();
> + private Map<String, String> params = new HashMap<String, String>();
> +
> + @Override
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + path=params.get("modelablepath");
> + }
> +
> + @Override
> + public String annotate(String sentence, String namedEntity, String entityType) {
> + String annotation = sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> ");
> +
> + return annotation;
> + }
> +
> + @Override
> + public void writeAnnotatedSentences() {
> + try {
> + FileWriter writer = new FileWriter(path + "en-ner-person.train", false);
> +
> + for (String s : annotatedSentences) {
> + writer.write(s.replace("\n", "").trim() + "\n");
> + }
> + writer.close();
> + } catch (IOException ex) {
> + }
> + }
> +
> + @Override
> + public Set<String> getAnnotatedSentences() {
> + return annotatedSentences;
> + }
> +
> + @Override
> + public void setAnnotatedSentences(Set<String> annotatedSentences) {
> + this.annotatedSentences = annotatedSentences;
> + }
> +
> + @Override
> + public void addAnnotatedSentence(String annotatedSentence) {
> + if (annotatedSentence != null) {
> + int before = annotatedSentences.size();
> + annotatedSentences.add(annotatedSentence);
> + if (annotatedSentences.size() > before) {
> + }
> + }
> + }
> +
> + @Override
> + public void buildModel(String entityType) {
> + try {
> + System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations");
> + System.out.println("\t\treading training data...");
> + Charset charset = Charset.forName("UTF-8");
> + ObjectStream<String> lineStream =
> + new PlainTextByLineStream(new FileInputStream(path + "en-ner-person.train"), charset);
> + ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);
> +
> + TokenNameFinderModel model;
> + model = NameFinderME.train("en", "person", sampleStream, null);
> + sampleStream.close();
> + OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(new File(path + "en-ner-person.train.model")));
> + model.serialize(modelOut);
> + if (modelOut != null) {
> + modelOut.close();
> + }
> + System.out.println("\tmodel generated");
> + } catch (Exception e) {
> + }
> + }
> +
> + @Override
> + public TokenNameFinderModel getModel() {
> +
> +
> + TokenNameFinderModel nerModel = null;
> + try {
> + nerModel = new TokenNameFinderModel(new FileInputStream(new File(path + "en-ner-person.train.model")));
> + } catch (IOException ex) {
> + Logger.getLogger(ModelableImpl.class.getName()).log(Level.SEVERE, null, ex);
> + }
> + return nerModel;
> + }
> +
> + @Override
> + public String[] tokenizeSentenceToWords(String sentence) {
> + return sentence.split(" ");
> +// try {
> +// if (tm == null || wordBreaker == null) {
> +// tm = new TokenizerModel(new FileInputStream(new File(path + "en-token.zip")));
> +// wordBreaker = new TokenizerME(tm);
> +// }
> +// } catch (IOException ex) {
> +// }
> +// return wordBreaker.tokenize(sentence);
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,78 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.sql.*;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import opennlp.modelbuilder.v2.SentenceProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class MySQLSentenceProviderImpl implements SentenceProvider {
> +
> + Set<String> sentences = new HashSet<String>();
> +
> + @Override
> + public Set<String> getSentences() {
> + try {
> + if (sentences.isEmpty()) {
> + return getData();
> + }
> + } catch (Exception e) {
> + }
> + return sentences;
> + }
> +
> + private Set<String> getData() throws Exception {
> +
> + Connection con = getMySqlConnection();
> + if (con.isClosed()) {
> + con = getMySqlConnection();
> + }
> + CallableStatement cs;
> + cs = con.prepareCall("CALL getTrainingSentences()");
> +
> + ResultSet rs;
> + try {
> + rs = cs.executeQuery();
> + while (rs.next()) {
> + sentences.add(rs.getString(1));
> + }
> +
> + } catch (SQLException ex) {
> + throw ex;
> + } catch (Exception e) {
> + System.err.println(e);
> + } finally {
> + con.close();
> + }
> +
> + return sentences;
> + }
> +
> + private static Connection getMySqlConnection() throws Exception {
> + // EntityLinkerProperties property = new EntityLinkerProperties(new File("c:\\temp\\opennlpmodels\\entitylinker.properties"));
> + String driver = "org.gjt.mm.mysql.Driver";
> + String url = "jdbc:mysql://127.0.0.1:3306/wink";
> + String username = "root";
> + String password = "559447";
> +
> + Class.forName(driver);
> + Connection conn = DriverManager.getConnection(url, username, password);
> + return conn;
> + }
> +
> + private Map<String, String> params = new HashMap<String,String>();
> +
> + @Override
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,98 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import opennlp.modelbuilder.v2.KnownEntityProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class PersonKnownEntityProviderImpl implements KnownEntityProvider {
> +
> + Set<String> ret = new HashSet<String>();
> +
> + @Override
> + public Set<String> getKnownEntities() {
> + if (ret.isEmpty()) {
> + ret.add("Barack Obama");
> + ret.add("Mitt Romney");
> + ret.add("John Doe");
> + ret.add("Bill Gates");
> + ret.add("Nguyen Tan Dung");
> + ret.add("Hassanal Bolkiah");
> + ret.add("Bashar al-Assad");
> + ret.add("Faysal Khabbaz Hamou");
> + ret.add("Dr Talwar");
> + ret.add("Mr. Bolkiah");
> + ret.add("Bashar");
> + ret.add("Romney");
> + ret.add("Obama");
> + ret.add("the President");
> + ret.add("Mr. Gates");
> + ret.add("Romney");
> +
> +
> +
> + ret.add("Xi Jinping");
> + ret.add("Hassanal Bolkiah");
> + ret.add("Leon Panetta");
> + ret.add("Paul Beales");
> + ret.add("Mr Rajapaksa");
> + ret.add("Mohammed ");
> + ret.add("Ieng Thirith");
> + ret.add("Mr Xi");
> + ret.add("John Sudworth");
> + ret.add("Ieng Thirith");
> + ret.add("Aung San Suu Kyi");
> +
> + ret.add("Khorshid");
> + ret.add("Karrie Webb");
> + ret.add("Doyle McManus");
> + ret.add("Pope John Paul");
> + ret.add("Roland Buerk");
> + ret.add("Paul Ryan");
> + ret.add("Tammy Baldwin");
> + ret.add("Ben Unger");
> + ret.add("Chris Christie");
> + ret.add("Mary Magdalene");
> + ret.add("George Walker Bush");
> + ret.add("Melendez-Martinez");
> + ret.add("Osiel Cardenas Guillen");
> + ret.add("President Molina");
> + ret.add("Lubaina Himid");
> + ret.add("Elizabeth Frink");
> + ret.add("Graham Sutherland");
> + ret.add("Gorman Adams");
> + ret.add("Peter Sheasby");
> + ret.add("Andrew Walker");
> + ret.add("Elias Garcia Martinez");
> + ret.add("Elias Martinez");
> +
> + }
> + return ret;
> + }
> +
> + @Override
> + public String getKnownEntitiesType() {
> + return "person";
> + }
> +
> + @Override
> + public void addKnownEntity(String unambiguousEntity) {
> + ret.add(unambiguousEntity);
> + }
> +
> + private Map<String, String> params = new HashMap<String,String>();
> +
> + @Override
> + public void setParameters(Map<String, String> params) {
> + this.params = params;
> + }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,38 @@
> +package modelbuilder;
> +
> +import junit.framework.Test;
> +import junit.framework.TestCase;
> +import junit.framework.TestSuite;
> +
> +/**
> + * Unit test for simple App.
> + */
> +public class AppTest
> + extends TestCase
> +{
> + /**
> + * Create the test case
> + *
> + * @param testName name of the test case
> + */
> + public AppTest( String testName )
> + {
> + super( testName );
> + }
> +
> + /**
> + * @return the suite of tests being tested
> + */
> + public static Test suite()
> + {
> + return new TestSuite( AppTest.class );
> + }
> +
> + /**
> + * Rigourous Test :-)
> + */
> + public void testApp()
> + {
> + assertTrue( true );
> + }
> +}
>
>