You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@opennlp.apache.org by Jörn Kottmann <ko...@gmail.com> on 2013/10/21 14:16:11 UTC

Re: svn commit: r1533883 - in /opennlp/sandbox/modelbuilder-prototype: ./ src/ src/main/ src/main/java/ src/main/java/modelbuilder/ src/main/java/opennlp/ src/main/java/opennlp/modelbuilder/ src/main/java/opennlp/modelbuilder/v2/ src/main/java/opennlp/mode...

Hello,

all files which are checked in should have the AL header.

Can you please add the header to these files?
Have a look at the source files in opennlp-tools and just copy their 
headers over.

Some IDEs allow to configure the header, it will then be automatically 
inserted
when a new class is created.

HTH,
Jörn


On 10/20/2013 03:00 PM, markg@apache.org wrote:
> Author: markg
> Date: Sun Oct 20 13:00:17 2013
> New Revision: 1533883
>
> URL: http://svn.apache.org/r1533883
> Log:
> Prototype of a tool to allow users to create models from  of a set of known entities based on their own data in the form of sentences.
> See the Example class in the .v2 package.
>
> Added:
>      opennlp/sandbox/modelbuilder-prototype/pom.xml
>      opennlp/sandbox/modelbuilder-prototype/src/
>      opennlp/sandbox/modelbuilder-prototype/src/main/
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java
>      opennlp/sandbox/modelbuilder-prototype/src/main/resources/
>      opennlp/sandbox/modelbuilder-prototype/src/test/
>      opennlp/sandbox/modelbuilder-prototype/src/test/java/
>      opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/
>      opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java
>
> Added: opennlp/sandbox/modelbuilder-prototype/pom.xml
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/pom.xml?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/pom.xml (added)
> +++ opennlp/sandbox/modelbuilder-prototype/pom.xml Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,30 @@
> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> +  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
> +  <modelVersion>4.0.0</modelVersion>
> +
> +  <groupId>modelbuilder</groupId>
> +  <artifactId>modelbuilder-prototype</artifactId>
> +  <version>1.0-SNAPSHOT</version>
> +  <packaging>jar</packaging>
> +
> +  <name>modelbuilder-prototype</name>
> +  <url>http://maven.apache.org</url>
> +
> +  <properties>
> +    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
> +  </properties>
> +
> +  <dependencies>
> +    <dependency>
> +      <groupId>junit</groupId>
> +      <artifactId>junit</artifactId>
> +      <version>3.8.1</version>
> +      <scope>test</scope>
> +    </dependency>
> +      <dependency>
> +      <groupId>org.apache.opennlp</groupId>
> +      <artifactId>opennlp-tools</artifactId>
> +      <version>1.6.0-SNAPSHOT</version>
> +    </dependency>
> +  </dependencies>
> +</project>
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/modelbuilder/App.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,13 @@
> +package modelbuilder;
> +
> +/**
> + * Hello world!
> + *
> + */
> +public class App
> +{
> +    public static void main( String[] args )
> +    {
> +
> +    }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,67 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.HashMap;
> +import java.util.Map;
> +import opennlp.modelbuilder.v2.impls.FileKnownEntityProvider;
> +import opennlp.modelbuilder.v2.impls.FileModelValidatorImpl;
> +import opennlp.modelbuilder.v2.impls.FileSentenceProvider;
> +import opennlp.modelbuilder.v2.impls.ModelableImpl;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class Example {
> +
> +  public static void main(String[] args) {
> +
> +    GenericModelGenerator modelGenerator = new GenericModelGenerator();
> +    //every component has a map as a place to recieve params
> +    //these are required for the current file-based impls
> +    Map<String, String> params = new HashMap<String, String>();
> +    params.put("sentencesfile", "/the/file");
> +    params.put("knownentityfile", "/the/file");
> +    params.put("knownentitytype", "person");
> +    params.put("blacklistfile", "/the/file");
> +    params.put("modelablepath", "/the/file");
> +
> +    /**
> +     * sentence providers feed this process with user data derived sentences
> +     * this impl just reads line by line through a file
> +     */
> +    SentenceProvider sentenceProvider = new FileSentenceProvider();
> +    sentenceProvider.setParameters(params);
> +    /**
> +     *KnownEntityProviders provide a seed list of known entities... such as Barack Obama for person, or Germany for location
> +     * obviously these would want to be prolific, non ambiguous names
> +     */
> +    KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
> +    knownEntityProvider.setParameters(params);
> +    /**
> +     * ModelGenerationValidators try to weed out bad hits by the iterations of the name finder.
> +     * Since this is a recursive process, with each iteration the namefinder will get more and more greedy if bad entities are allowed in
> +     * this provides a mechanism for throwing out obviously bad hits.
> +     * A good impl may be to make sure a location is actually within a noun phrase etc...users can make this as specific as they need for their dat
> +     * and their use case
> +     */
> +    ModelGenerationValidator validator = new FileModelValidatorImpl();
> +    validator.setParameters(params);
> +    /**
> +     * Modelable's write and read the annotated sentences, as well as create and write the NER models
> +     */
> +
> +    Modelable modelable = new ModelableImpl();
> +    modelable.setParameters(params);
> +
> +    /**
> +     * the modelGenerator actually runs the process with a set number of iterations... could be better by actually calculating the
> +     * diff between runs and stopping based on a thresh, but for extrememly large sentence sets this may be too much.
> +     */
> +    modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, 3);
> +
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/GenericModelGenerator.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,70 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.HashMap;
> +import java.util.Map;
> +import opennlp.tools.namefind.NameFinderME;
> +import opennlp.tools.util.Span;
> +
> +/**
> + *
> + *
> + */
> +public class GenericModelGenerator implements SemiSupervisedModelGenerator{
> + private Map<String, String> params = new HashMap<String, String>();
> +
> +  @Override
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +  }
> +  @Override
> +  public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
> +          ModelGenerationValidator validator, Modelable modelable, int iterations) {
> +    for (int iteration = 0; iteration < iterations; iteration++) {
> +      System.out.println("ITERATION: " + iteration);
> +      System.out.println("\tPerfoming Known Entity Annotation");
> +      System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
> +      System.out.println("\t\treading data....: ");
> +      for (String sentence : sentenceProvider.getSentences()) {
> +        for (String knownEntity : knownEntityProvider.getKnownEntities()) {
> +          if (sentence.contains(knownEntity)) {
> +            //if the same sentence has multiple hits should they be annotated separately?
> +            modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType()));
> +          }
> +        }
> +      }
> +      System.out.println("\t\twriting annotated sentences....: ");
> +      modelable.writeAnnotatedSentences();
> +      modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
> +      NameFinderME nf = new NameFinderME(modelable.getModel());
> +      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
> +      System.out.println("\tPerforming NER");
> +      for (String sentence : sentenceProvider.getSentences()) {
> +        if (!validator.validSentence(sentence)) {
> +          continue;
> +        }
> +        String[] tokens = modelable.tokenizeSentenceToWords(sentence);
> +
> +        Span[] find = nf.find(tokens);
> +        nf.clearAdaptiveData();
> +
> +        String[] namedEntities = Span.spansToStrings(find, tokens);
> +
> +        for (String namedEntity : namedEntities) {
> +          if (validator.validNamedEntity(namedEntity)) {
> +            knownEntityProvider.addKnownEntity(namedEntity);
> +            modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType()));
> +
> +          }
> +        }
> +      }
> +      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
> +      System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
> +    }
> +    modelable.writeAnnotatedSentences();
> +    modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/KnownEntityProvider.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,35 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.List;
> +import java.util.Set;
> +
> +
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface KnownEntityProvider extends ModelParameter{
> +  /**
> + * returns a list of known non ambiguous entities.
> + * @return a set of entities
> + */
> +  Set<String> getKnownEntities();
> +/**
> + * adds to the set of known entities. Overriding classes should hold this list in a class level set.
> + * @param unambiguousEntity
> + */
> +  void addKnownEntity(String unambiguousEntity);
> +/**
> + * defines the type of entity that the set contains, ie person, location, organization.
> + * @return
> + */
> +  String getKnownEntitiesType();
> +
> +
> +
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelGenerationValidator.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,23 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.Collection;
> +import java.util.Set;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface ModelGenerationValidator extends ModelParameter {
> +
> +  Boolean validSentence(String sentence);
> +
> +  Boolean validNamedEntity(String namedEntity);
> +
> +
> +
> +  Collection<String> getBlackList();
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/ModelParameter.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,17 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.HashMap;
> +import java.util.Map;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface ModelParameter {
> +
> +  void setParameters(Map<String, String> params);
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Modelable.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,37 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.List;
> +import java.util.Map;
> +import java.util.Set;
> +import opennlp.tools.namefind.TokenNameFinderModel;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface Modelable extends ModelParameter{
> +
> +
> +
> +  String annotate(String sentence, String namedEntity, String entityType);
> +
> +  void writeAnnotatedSentences();
> +
> +  Set<String> getAnnotatedSentences();
> +
> +  void setAnnotatedSentences(Set<String> annotatedSentences);
> +
> +  void addAnnotatedSentence(String annotatedSentence);
> +
> +  void buildModel( String entityType);
> +
> +  TokenNameFinderModel getModel();
> +
> +  String[] tokenizeSentenceToWords(String sentence);
> +
> +
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SemiSupervisedModelGenerator.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,15 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface SemiSupervisedModelGenerator extends ModelParameter {
> +
> +  void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
> +          ModelGenerationValidator validator, Modelable modelable, int iterations);
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/SentenceProvider.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,16 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2;
> +
> +import java.util.Set;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public interface SentenceProvider extends ModelParameter {
> +
> +  Set<String> getSentences();
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileKnownEntityProvider.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,74 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.io.BufferedReader;
> +import java.io.FileInputStream;
> +import java.io.FileNotFoundException;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.InputStreamReader;
> +import java.nio.charset.Charset;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import opennlp.modelbuilder.v2.KnownEntityProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class FileKnownEntityProvider implements KnownEntityProvider {
> +  private Map<String, String> params = new HashMap<String, String>();
> +  Set<String> knownEntities = new HashSet<String>();
> +
> +  @Override
> +  public Set<String> getKnownEntities() {
> +    if (knownEntities.isEmpty()) {
> +      try {
> +        InputStream fis;
> +        BufferedReader br;
> +        String line;
> +
> +        fis = new FileInputStream(params.get("knownentityfile"));
> +        br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
> +        while ((line = br.readLine()) != null) {
> +          knownEntities.add(line);
> +        }
> +
> +        // Done with the file
> +        br.close();
> +        br = null;
> +        fis = null;
> +      } catch (FileNotFoundException ex) {
> +        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> +      } catch (IOException ex) {
> +        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> +      }
> +    }
> +    return knownEntities;
> +  }
> +
> +  @Override
> +  public void addKnownEntity(String unambiguousEntity) {
> +    knownEntities.add(unambiguousEntity);
> +  }
> +
> +  @Override
> +  public String getKnownEntitiesType() {
> +
> +    return params.get("knownentitytype");
> +  }
> +
> +
> +
> +  @Override
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileModelValidatorImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,88 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.io.BufferedReader;
> +import java.io.FileInputStream;
> +import java.io.FileNotFoundException;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.InputStreamReader;
> +import java.nio.charset.Charset;
> +import java.util.Collection;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import java.util.regex.Pattern;
> +import opennlp.modelbuilder.v2.ModelGenerationValidator;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class FileModelValidatorImpl implements ModelGenerationValidator {
> +
> +  private Set<String> badentities = new HashSet<String>();
> +  private final double MIN_SCORE_FOR_TRAINING = 0.95d;
> +  private Object validationData;
> +  private Map<String, String> params = new HashMap<String, String>();
> +
> +  @Override
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +  }
> +
> +  @Override
> +  public Boolean validSentence(String sentence) {
> +    //returning true by default, because the sentence provider will  return only "valid" sentences in this case
> +    return true;
> +  }
> +
> +  @Override
> +  public Boolean validNamedEntity(String namedEntity) {
> +
> +    if (badentities.isEmpty()) {
> +      getBlackList();
> +    }
> +
> +    Pattern p = Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
> +    if (p.matcher(namedEntity).find()) {
> +      return false;
> +    }
> +    Boolean b = true;
> +    if (badentities.contains(namedEntity.toLowerCase())) {
> +      b = false;
> +    }
> +    return b;
> +  }
> +
> +  @Override
> +  public Collection<String> getBlackList() {
> +    if (!badentities.isEmpty()) {
> +      try {
> +        InputStream fis;
> +        BufferedReader br;
> +        String line;
> +
> +        fis = new FileInputStream(params.get("blacklistfile"));
> +        br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
> +        while ((line = br.readLine()) != null) {
> +          badentities.add(line);
> +        }
> +        br.close();
> +        br = null;
> +        fis = null;
> +      } catch (FileNotFoundException ex) {
> +        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> +      } catch (IOException ex) {
> +        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> +      }
> +    }
> +    return badentities;
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/FileSentenceProvider.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,60 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.io.BufferedReader;
> +import java.io.FileInputStream;
> +import java.io.FileNotFoundException;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.InputStreamReader;
> +import java.nio.charset.Charset;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import opennlp.modelbuilder.v2.SentenceProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class FileSentenceProvider implements SentenceProvider {
> +
> +  private Map<String, String> params = new HashMap<String, String>();
> +  Set<String> sentences = new HashSet<String>();
> +
> +  public Set<String> getSentences() {
> +     if (sentences.isEmpty()) {
> +      try {
> +        InputStream fis;
> +        BufferedReader br;
> +        String line;
> +
> +        fis = new FileInputStream(params.get("sentencesfile"));
> +        br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
> +        while ((line = br.readLine()) != null) {
> +          sentences.add(line);
> +        }
> +
> +        // Done with the file
> +        br.close();
> +        br = null;
> +        fis = null;
> +      } catch (FileNotFoundException ex) {
> +        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> +      } catch (IOException ex) {
> +        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
> +      }
> +    }
> +    return sentences;
> +  }
> +
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/LocationKnownEntityProviderImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,93 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.sql.CallableStatement;
> +import java.sql.Connection;
> +import java.sql.DriverManager;
> +import java.sql.ResultSet;
> +import java.sql.SQLException;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import opennlp.modelbuilder.v2.KnownEntityProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class LocationKnownEntityProviderImpl implements KnownEntityProvider {
> +
> +  Set<String> ret = new HashSet<String>();
> +
> +  @Override
> +  public Set<String> getKnownEntities() {
> +    if (ret.isEmpty()) {
> +      try {
> +        getData();
> +      } catch (Exception ex) {
> +        Logger.getLogger(LocationKnownEntityProviderImpl.class.getName()).log(Level.SEVERE, null, ex);
> +      }
> +
> +    }
> +    return ret;
> +  }
> +   private Set<String> getData() throws Exception {
> +
> +    Connection con = getMySqlConnection();
> +    if (con.isClosed()) {
> +      con = getMySqlConnection();
> +    }
> +    CallableStatement cs;
> +    cs = con.prepareCall("CALL getcountrylist()");
> +
> +    ResultSet rs;
> +    try {
> +      rs = cs.executeQuery();
> +      while (rs.next()) {
> +        ret.add(rs.getString("full_name_nd_ro"));
> +      }
> +
> +    } catch (SQLException ex) {
> +      throw ex;
> +    } catch (Exception e) {
> +      System.err.println(e);
> +    } finally {
> +      con.close();
> +    }
> +
> +    return ret;
> +  }
> +  private static Connection getMySqlConnection() throws Exception {
> +    // EntityLinkerProperties property = new EntityLinkerProperties(new File("c:\\temp\\opennlpmodels\\entitylinker.properties"));
> +    String driver = "org.gjt.mm.mysql.Driver";
> +    String url = "jdbc:mysql://127.0.0.1:3306/world";
> +    String username = "root";
> +    String password = "559447";
> +
> +    Class.forName(driver);
> +    Connection conn = DriverManager.getConnection(url, username, password);
> +    return conn;
> +  }
> +  @Override
> +  public String getKnownEntitiesType() {
> +    return "location";
> +  }
> +
> +  @Override
> +  public void addKnownEntity(String unambiguousEntity) {
> +    ret.add(unambiguousEntity);
> +  }
> +
> + private Map<String, String> params = new HashMap<String, String>();
> +
> +  @Override
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelValidatorImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,131 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.regex.Pattern;
> +import opennlp.modelbuilder.v2.ModelGenerationValidator;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class ModelValidatorImpl implements ModelGenerationValidator {
> +
> +  private Set<String> badentities = new HashSet<String>();
> +  private final double MIN_SCORE_FOR_TRAINING = 0.95d;
> +  private Object validationData;
> + private Map<String, String> params = new HashMap<String, String>();
> +
> +  @Override
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +  }
> +  @Override
> +  public Boolean validSentence(String sentence) {
> +    //returning true by default, because the sentence provider will  return only "valid" sentences in this case
> +    return true;
> +  }
> +
> +  @Override
> +  public Boolean validNamedEntity(String namedEntity) {
> +
> +    if (badentities.isEmpty()) {
> +      getBlackList();
> +    }
> +
> +    Pattern p = Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
> +    if (p.matcher(namedEntity).find()) {
> +      return false;
> +    }
> +    Boolean b = true;
> +    if (badentities.contains(namedEntity.toLowerCase())) {
> +      b = false;
> +    }
> +    return b;
> +  }
> +
> +
> +  @Override
> +  public Set<String> getBlackList() {
> +    badentities.add(".");
> +    badentities.add("-");
> +    badentities.add(",");
> +    badentities.add(";");
> +    badentities.add("the");
> +    badentities.add("that");
> +    badentities.add("several");
> +    badentities.add("model");
> +    badentities.add("our");
> +    badentities.add("are");
> +    badentities.add("in");
> +    badentities.add("are");
> +    badentities.add("at");
> +    badentities.add("is");
> +    badentities.add("for");
> +    badentities.add("the");
> +    badentities.add("during");
> +    badentities.add("south");
> +    badentities.add("from");
> +    badentities.add("recounts");
> +    badentities.add("wissenschaftliches");
> +    badentities.add("if");
> +    badentities.add("security");
> +    badentities.add("denouncing");
> +    badentities.add("writes");
> +    badentities.add("but");
> +    badentities.add("operation");
> +    badentities.add("adds");
> +    badentities.add("Above");
> +    badentities.add("but");
> +    badentities.add("RIP");
> +    badentities.add("on");
> +    badentities.add("no");
> +    badentities.add("agrees");
> +    badentities.add("year");
> +    badentities.add("for");
> +    badentities.add("you");
> +    badentities.add("red");
> +    badentities.add("added");
> +    badentities.add("hello");
> +    badentities.add("around");
> +    badentities.add("has");
> +    badentities.add("turn");
> +    badentities.add("surrounding");
> +    badentities.add("\" No");
> +    badentities.add("aug.");
> +    badentities.add("or");
> +    badentities.add("quips");
> +    badentities.add("september");
> +    badentities.add("[mr");
> +    badentities.add("diseases");
> +    badentities.add("when");
> +    badentities.add("bbc");
> +    badentities.add(":\"");
> +    badentities.add("dr");
> +    badentities.add("baby");
> +    badentities.add("on");
> +    badentities.add("route");
> +    badentities.add("'");
> +    badentities.add("\"");
> +    badentities.add("a");
> +    badentities.add("her");
> +    badentities.add("'");
> +    badentities.add("\"");
> +    badentities.add("two");
> +    badentities.add("that");
> +    badentities.add(":");
> +    badentities.add("one");
> +    badentities.add("Party");
> +    badentities.add("Championship");
> +
> +    badentities.add("Ltd");
> +
> +    return badentities;
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/ModelableImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,137 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.io.BufferedOutputStream;
> +import java.io.File;
> +import java.io.FileInputStream;
> +import java.io.FileOutputStream;
> +import java.io.FileWriter;
> +import java.io.IOException;
> +import java.io.OutputStream;
> +import java.nio.charset.Charset;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.logging.Level;
> +import java.util.logging.Logger;
> +import opennlp.modelbuilder.v2.Modelable;
> +import opennlp.tools.namefind.NameFinderME;
> +import opennlp.tools.namefind.NameSample;
> +import opennlp.tools.namefind.NameSampleDataStream;
> +import opennlp.tools.namefind.TokenNameFinderModel;
> +import opennlp.tools.tokenize.TokenizerME;
> +import opennlp.tools.tokenize.TokenizerModel;
> +import opennlp.tools.util.ObjectStream;
> +import opennlp.tools.util.PlainTextByLineStream;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class ModelableImpl implements Modelable {
> +
> +  private TokenizerModel tm;
> +  private TokenizerME wordBreaker;
> +  private String path = "c:\\temp\\opennlpmodels\\";
> +  private Set<String> annotatedSentences = new HashSet<String>();
> +  private Map<String, String> params = new HashMap<String, String>();
> +
> +  @Override
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +    path=params.get("modelablepath");
> +  }
> +
> +  @Override
> +  public String annotate(String sentence, String namedEntity, String entityType) {
> +    String annotation = sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> ");
> +
> +    return annotation;
> +  }
> +
> +  @Override
> +  public void writeAnnotatedSentences() {
> +    try {
> +      FileWriter writer = new FileWriter(path + "en-ner-person.train", false);
> +
> +      for (String s : annotatedSentences) {
> +        writer.write(s.replace("\n", "").trim() + "\n");
> +      }
> +      writer.close();
> +    } catch (IOException ex) {
> +    }
> +  }
> +
> +  @Override
> +  public Set<String> getAnnotatedSentences() {
> +    return annotatedSentences;
> +  }
> +
> +  @Override
> +  public void setAnnotatedSentences(Set<String> annotatedSentences) {
> +    this.annotatedSentences = annotatedSentences;
> +  }
> +
> +  @Override
> +  public void addAnnotatedSentence(String annotatedSentence) {
> +    if (annotatedSentence != null) {
> +      int before = annotatedSentences.size();
> +      annotatedSentences.add(annotatedSentence);
> +      if (annotatedSentences.size() > before) {
> +      }
> +    }
> +  }
> +
> +  @Override
> +  public void buildModel(String entityType) {
> +    try {
> +      System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations");
> +      System.out.println("\t\treading training data...");
> +      Charset charset = Charset.forName("UTF-8");
> +      ObjectStream<String> lineStream =
> +              new PlainTextByLineStream(new FileInputStream(path + "en-ner-person.train"), charset);
> +      ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);
> +
> +      TokenNameFinderModel model;
> +      model = NameFinderME.train("en", "person", sampleStream, null);
> +      sampleStream.close();
> +      OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(new File(path + "en-ner-person.train.model")));
> +      model.serialize(modelOut);
> +      if (modelOut != null) {
> +        modelOut.close();
> +      }
> +      System.out.println("\tmodel generated");
> +    } catch (Exception e) {
> +    }
> +  }
> +
> +  @Override
> +  public TokenNameFinderModel getModel() {
> +
> +
> +    TokenNameFinderModel nerModel = null;
> +    try {
> +      nerModel = new TokenNameFinderModel(new FileInputStream(new File(path + "en-ner-person.train.model")));
> +    } catch (IOException ex) {
> +      Logger.getLogger(ModelableImpl.class.getName()).log(Level.SEVERE, null, ex);
> +    }
> +    return nerModel;
> +  }
> +
> +  @Override
> +  public String[] tokenizeSentenceToWords(String sentence) {
> +    return sentence.split(" ");
> +//    try {
> +//      if (tm == null || wordBreaker == null) {
> +//        tm = new TokenizerModel(new FileInputStream(new File(path + "en-token.zip")));
> +//        wordBreaker = new TokenizerME(tm);
> +//      }
> +//    } catch (IOException ex) {
> +//    }
> +//    return wordBreaker.tokenize(sentence);
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/MySQLSentenceProviderImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,78 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.sql.*;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import opennlp.modelbuilder.v2.SentenceProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class MySQLSentenceProviderImpl implements SentenceProvider {
> +
> +  Set<String> sentences = new HashSet<String>();
> +
> +  @Override
> +  public Set<String> getSentences() {
> +    try {
> +      if (sentences.isEmpty()) {
> +        return getData();
> +      }
> +    } catch (Exception e) {
> +    }
> +    return sentences;
> +  }
> +
> +  private Set<String> getData() throws Exception {
> +
> +    Connection con = getMySqlConnection();
> +    if (con.isClosed()) {
> +      con = getMySqlConnection();
> +    }
> +    CallableStatement cs;
> +    cs = con.prepareCall("CALL getTrainingSentences()");
> +
> +    ResultSet rs;
> +    try {
> +      rs = cs.executeQuery();
> +      while (rs.next()) {
> +        sentences.add(rs.getString(1));
> +      }
> +
> +    } catch (SQLException ex) {
> +      throw ex;
> +    } catch (Exception e) {
> +      System.err.println(e);
> +    } finally {
> +      con.close();
> +    }
> +
> +    return sentences;
> +  }
> +
> +  private static Connection getMySqlConnection() throws Exception {
> +    // EntityLinkerProperties property = new EntityLinkerProperties(new File("c:\\temp\\opennlpmodels\\entitylinker.properties"));
> +    String driver = "org.gjt.mm.mysql.Driver";
> +    String url = "jdbc:mysql://127.0.0.1:3306/wink";
> +    String username = "root";
> +    String password = "559447";
> +
> +    Class.forName(driver);
> +    Connection conn = DriverManager.getConnection(url, username, password);
> +    return conn;
> +  }
> +
> + private Map<String, String> params = new HashMap<String,String>();
> +
> +  @Override
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/impls/PersonKnownEntityProviderImpl.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,98 @@
> +/*
> + * To change this template, choose Tools | Templates
> + * and open the template in the editor.
> + */
> +package opennlp.modelbuilder.v2.impls;
> +
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Map;
> +import java.util.Set;
> +import opennlp.modelbuilder.v2.KnownEntityProvider;
> +
> +/**
> + *
> + * @author Owner
> + */
> +public class PersonKnownEntityProviderImpl implements KnownEntityProvider {
> +
> +  Set<String> ret = new HashSet<String>();
> +
> +  @Override
> +  public Set<String> getKnownEntities() {
> +    if (ret.isEmpty()) {
> +      ret.add("Barack Obama");
> +      ret.add("Mitt Romney");
> +      ret.add("John Doe");
> +      ret.add("Bill Gates");
> +      ret.add("Nguyen Tan Dung");
> +      ret.add("Hassanal Bolkiah");
> +      ret.add("Bashar al-Assad");
> +      ret.add("Faysal Khabbaz Hamou");
> +      ret.add("Dr Talwar");
> +      ret.add("Mr. Bolkiah");
> +      ret.add("Bashar");
> +      ret.add("Romney");
> +      ret.add("Obama");
> +      ret.add("the President");
> +      ret.add("Mr. Gates");
> +      ret.add("Romney");
> +
> +
> +
> +      ret.add("Xi Jinping");
> +      ret.add("Hassanal Bolkiah");
> +      ret.add("Leon Panetta");
> +      ret.add("Paul Beales");
> +      ret.add("Mr Rajapaksa");
> +      ret.add("Mohammed ");
> +      ret.add("Ieng Thirith");
> +      ret.add("Mr Xi");
> +      ret.add("John Sudworth");
> +      ret.add("Ieng Thirith");
> +      ret.add("Aung San Suu Kyi");
> +
> +      ret.add("Khorshid");
> +      ret.add("Karrie Webb");
> +      ret.add("Doyle McManus");
> +      ret.add("Pope John Paul");
> +      ret.add("Roland Buerk");
> +      ret.add("Paul Ryan");
> +      ret.add("Tammy Baldwin");
> +      ret.add("Ben Unger");
> +      ret.add("Chris Christie");
> +      ret.add("Mary Magdalene");
> +      ret.add("George Walker Bush");
> +      ret.add("Melendez-Martinez");
> +      ret.add("Osiel Cardenas Guillen");
> +      ret.add("President Molina");
> +      ret.add("Lubaina Himid");
> +      ret.add("Elizabeth Frink");
> +      ret.add("Graham Sutherland");
> +      ret.add("Gorman Adams");
> +      ret.add("Peter Sheasby");
> +      ret.add("Andrew Walker");
> +      ret.add("Elias Garcia Martinez");
> +      ret.add("Elias Martinez");
> +
> +    }
> +    return ret;
> +  }
> +
> +  @Override
> +  public String getKnownEntitiesType() {
> +    return "person";
> +  }
> +
> +  @Override
> +  public void addKnownEntity(String unambiguousEntity) {
> +    ret.add(unambiguousEntity);
> +  }
> +
> +  private Map<String, String> params = new HashMap<String,String>();
> +
> +  @Override
> +  public void setParameters(Map<String, String> params) {
> +    this.params = params;
> +  }
> +}
>
> Added: opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java?rev=1533883&view=auto
> ==============================================================================
> --- opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java (added)
> +++ opennlp/sandbox/modelbuilder-prototype/src/test/java/modelbuilder/AppTest.java Sun Oct 20 13:00:17 2013
> @@ -0,0 +1,38 @@
> +package modelbuilder;
> +
> +import junit.framework.Test;
> +import junit.framework.TestCase;
> +import junit.framework.TestSuite;
> +
> +/**
> + * Unit test for simple App.
> + */
> +public class AppTest
> +    extends TestCase
> +{
> +    /**
> +     * Create the test case
> +     *
> +     * @param testName name of the test case
> +     */
> +    public AppTest( String testName )
> +    {
> +        super( testName );
> +    }
> +
> +    /**
> +     * @return the suite of tests being tested
> +     */
> +    public static Test suite()
> +    {
> +        return new TestSuite( AppTest.class );
> +    }
> +
> +    /**
> +     * Rigourous Test :-)
> +     */
> +    public void testApp()
> +    {
> +        assertTrue( true );
> +    }
> +}
>
>