You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2013/11/06 12:47:38 UTC

svn commit: r1539319 - in /opennlp/sandbox/apache-opennlp-addons: ./ src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/opennlp/ src/main/java/org/apache/opennlp/addons/ src/main/java/org/apache/opennlp/...

Author: markg
Date: Wed Nov  6 11:47:37 2013
New Revision: 1539319

URL: http://svn.apache.org/r1539319
Log:
OPENNLP-614
Moved all GeoEntityLinker impl classes to sandbox. Called this module addons as a place to consolidate useful addons
to the base opennlp modules.

Added:
    opennlp/sandbox/apache-opennlp-addons/pom.xml
    opennlp/sandbox/apache-opennlp-addons/src/
    opennlp/sandbox/apache-opennlp-addons/src/main/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextHit.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/FuzzyStringMatchScorer.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerEntry.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoHashBinningScorer.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/LinkedEntityScorer.java
    opennlp/sandbox/apache-opennlp-addons/src/test/
    opennlp/sandbox/apache-opennlp-addons/src/test/java/
    opennlp/sandbox/apache-opennlp-addons/src/test/java/apache/
    opennlp/sandbox/apache-opennlp-addons/src/test/java/apache/opennlp/
    opennlp/sandbox/apache-opennlp-addons/src/test/java/apache/opennlp/addons/
    opennlp/sandbox/apache-opennlp-addons/src/test/java/apache/opennlp/addons/AppTest.java
Modified:
    opennlp/sandbox/apache-opennlp-addons/   (props changed)

Propchange: opennlp/sandbox/apache-opennlp-addons/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Nov  6 11:47:37 2013
@@ -0,0 +1 @@
+target

Added: opennlp/sandbox/apache-opennlp-addons/pom.xml
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/pom.xml?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/pom.xml (added)
+++ opennlp/sandbox/apache-opennlp-addons/pom.xml Wed Nov  6 11:47:37 2013
@@ -0,0 +1,61 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>apache-opennlp-addons</groupId>
+  <artifactId>apache-opennlp-addons</artifactId>
+  <version>1.0-SNAPSHOT</version>
+  <packaging>jar</packaging>
+<name>Apache OpenNLP Addons</name>
+
+  <url>http://maven.apache.org</url>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>2.3.2</version>
+                <configuration>
+                    <source>1.7</source>
+                    <target>1.7</target>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+    <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>3.8.1</version>
+      <scope>test</scope>
+    </dependency>
+       <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-core</artifactId>
+      <version>4.5.0</version>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-analyzers-common</artifactId>
+      <version>4.5.0</version>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-queryparser</artifactId>
+      <version>4.5.0</version>
+      <optional>true</optional>
+    </dependency>
+      <dependency>
+      <groupId>org.apache.opennlp</groupId>
+      <artifactId>opennlp-tools</artifactId>
+      <version>1.6.0-SNAPSHOT</version>
+      <optional>true</optional>
+    </dependency>
+  </dependencies>
+</project>

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.sql.CallableStatement;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import opennlp.tools.entitylinker.EntityLinkerProperties;
+
+/**
+ * Finds instances of country mentions in a String, typically a document text.
+ * Used to boost or degrade scoring of linked geo entities
+ *
+ */
+public class CountryContext {
+
+  private Connection con;
+  private List<CountryContextEntry> countrydata;
+  private Map<String, Set<String>> nameCodesMap = new HashMap<String, Set<String>>();
+  private Map<String, Set<Integer>> countryMentions = new HashMap<String, Set<Integer>>();
+  private Set<CountryContextEntry> countryHits = new HashSet<>();
+
+  public Map<String, Set<String>> getNameCodesMap() {
+    return nameCodesMap;
+  }
+
+  public void setNameCodesMap(Map<String, Set<String>> nameCodesMap) {
+    this.nameCodesMap = nameCodesMap;
+  }
+
+  public CountryContext() {
+  }
+
+
+  /**
+   * Finds mentions of countries based on a list from MySQL stored procedure
+   * called getCountryList. This method finds country mentions in documents,
+   * which is an essential element of the scoring that is done for geo
+   * linkedspans. Lazily loads the list from the database.
+   *
+   * @param docText    the full text of the document
+   * @param properties EntityLinkerProperties for getting database connection
+   * @return
+   */
+  public Map<String, Set<Integer>> regexfind(String docText, EntityLinkerProperties properties) {
+    countryMentions = new HashMap<String, Set<Integer>>();
+    nameCodesMap.clear();
+    try {
+//      if (con == null) {
+//        con = getMySqlConnection(properties);
+//      }
+      if (countrydata == null) {
+         countrydata = getCountryContextFromFile(properties);
+     //   countrydata = getCountryData(properties);
+      }
+      for (CountryContextEntry entry : countrydata) {
+        Pattern regex = Pattern.compile(entry.getFull_name_nd_ro(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
+        Matcher rs = regex.matcher(docText);
+        String code = entry.getCc1().toLowerCase();
+
+        boolean found = false;
+        while (rs.find()) {
+          found = true;
+          Integer start = rs.start();
+          String hit = rs.group().toLowerCase();
+          if (countryMentions.containsKey(code)) {
+            countryMentions.get(code).add(start);
+          } else {
+            Set<Integer> newset = new HashSet<Integer>();
+            newset.add(start);
+            countryMentions.put(code, newset);
+          }
+          if (!hit.equals("")) {
+            if (this.nameCodesMap.containsKey(hit)) {
+              nameCodesMap.get(hit).add(code);
+            } else {
+              HashSet<String> newset = new HashSet<String>();
+              newset.add(code);
+              nameCodesMap.put(hit, newset);
+            }
+          }
+        }
+        if (found) {
+          countryHits.add(entry);
+        }
+
+      }
+
+    } catch (Exception ex) {
+      Logger.getLogger(CountryContext.class.getName()).log(Level.SEVERE, null, ex);
+    }
+
+
+    return countryMentions;
+  }
+
+  /**
+   * returns a unique list of country codes
+   *
+   * @param countryMentions the countryMentions discovered
+   * @return
+   */
+  public static Set<String> getCountryCodes(List<CountryContextHit> hits) {
+    Set<String> ccs = new HashSet<String>();
+    for (CountryContextHit hit : hits) {
+      ccs.add(hit.getCountryCode().toLowerCase());
+    }
+    return ccs;
+  }
+
+  public static String getCountryCodeCSV(Set<String> hits) {
+    String csv = "";
+    if (hits.isEmpty()) {
+      return csv;
+    }
+
+    for (String code : hits) {
+      csv += "," + code;
+    }
+    return csv.substring(1);
+  }
+
+  private Connection getMySqlConnection(EntityLinkerProperties properties) throws Exception {
+
+    String driver = properties.getProperty("db.driver", "org.gjt.mm.mysql.Driver");
+    String url = properties.getProperty("db.url", "jdbc:mysql://localhost:3306/world");
+    String username = properties.getProperty("db.username", "root");
+    String password = properties.getProperty("db.password", "?");
+
+    Class.forName(driver);
+    Connection conn = DriverManager.getConnection(url, username, password);
+    return conn;
+  }
+
+  /**
+   * reads the list from the database by calling a stored procedure
+   * getCountryList
+   *
+   * @param properties
+   * @return
+   * @throws SQLException
+   */
+  private List<CountryContextEntry> getCountryData(EntityLinkerProperties properties) throws SQLException {
+    List<CountryContextEntry> entries = new ArrayList<CountryContextEntry>();
+    try {
+      if (con == null) {
+        con = getMySqlConnection(properties);
+      }
+      CallableStatement cs;
+      cs = con.prepareCall("CALL `getCountryList`()");
+      ResultSet rs;
+      rs = cs.executeQuery();
+      if (rs == null) {
+        return entries;
+      }
+      while (rs.next()) {
+        CountryContextEntry s = new CountryContextEntry();
+        //rc,cc1, full_name_nd_ro,dsg
+        s.setRc(rs.getString(1));
+        s.setCc1(rs.getString(2));
+//a.district, 
+        s.setFull_name_nd_ro(rs.getString(3));
+//b.name as countryname, 
+        s.setDsg(rs.getString(4));
+        entries.add(s);
+      }
+
+    } catch (SQLException ex) {
+      System.err.println(ex);
+    } catch (Exception e) {
+      System.err.println(e);
+    } finally {
+      con.close();
+    }
+    return entries;
+  }
+
+  public Map<String, Set<Integer>> getCountryMentions() {
+    return countryMentions;
+  }
+
+  public Set<CountryContextEntry> getCountryHits() {
+    return countryHits;
+  }
+
+  private List<CountryContextEntry> getCountryContextFromFile(EntityLinkerProperties properties) {
+    List<CountryContextEntry> entries = new ArrayList<>();
+    String path = "";// properties.getProperty("geoentitylinker.countrycontext.filepath", "");
+    BufferedReader reader;
+
+    try {
+      path = properties.getProperty("opennlp.geoentitylinker.countrycontext.filepath", "");
+
+      reader = new BufferedReader(new FileReader(path));
+
+      while (reader.read() != -1) {
+        String line = reader.readLine();
+        String[] values = line.split("\t");
+        if (values.length != 4) {
+          throw new IOException("improperly formatted country context file");
+        }
+        CountryContextEntry entry = new CountryContextEntry();
+        // rc,cc1, full_name_nd_ro,dsg
+        entry.setRc(values[0].toLowerCase());
+        entry.setCc1(values[1].toLowerCase());
+        entry.setFull_name_nd_ro(values[2].toLowerCase());
+        entry.setDsg(values[3].toLowerCase());
+        entries.add(entry);
+      }
+      reader.close();
+    } catch (IOException e) {
+      System.err.println(e);
+    }
+    return entries;
+
+  }
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.util.Objects;
+
+/**
+ *Stores a tuple from mysql that is used to find country mentions in document text.
+ *
+ */
+public class CountryContextEntry {
+  /*
+   * rc,cc1, full_name_nd_ro,dsg
+   */
+
+  private String rc;
+  private String cc1;
+  private String full_name_nd_ro;
+  private String dsg;
+
+  public CountryContextEntry() {
+  }
+
+  public CountryContextEntry(String rc, String cc1, String full_name_nd_ro, String dsg) {
+    this.rc = rc;
+    this.cc1 = cc1;
+    this.full_name_nd_ro = full_name_nd_ro;
+    this.dsg = dsg;
+  }
+
+  public String getRc() {
+    return rc;
+  }
+
+  public void setRc(String rc) {
+    this.rc = rc;
+  }
+
+  public String getCc1() {
+    return cc1;
+  }
+
+  public void setCc1(String cc1) {
+    this.cc1 = cc1;
+  }
+
+  public String getFull_name_nd_ro() {
+    return full_name_nd_ro;
+  }
+
+  public void setFull_name_nd_ro(String full_name_nd_ro) {
+    this.full_name_nd_ro = full_name_nd_ro;
+  }
+
+  public String getDsg() {
+    return dsg;
+  }
+
+  public void setDsg(String dsg) {
+    this.dsg = dsg;
+  }
+
+  @Override
+  public int hashCode() {
+    int hash = 7;
+    hash = 17 * hash + Objects.hashCode(this.rc);
+    hash = 17 * hash + Objects.hashCode(this.cc1);
+    hash = 17 * hash + Objects.hashCode(this.full_name_nd_ro);
+    hash = 17 * hash + Objects.hashCode(this.dsg);
+    return hash;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null) {
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    final CountryContextEntry other = (CountryContextEntry) obj;
+    if (!Objects.equals(this.rc, other.rc)) {
+      return false;
+    }
+    if (!Objects.equals(this.cc1, other.cc1)) {
+      return false;
+    }
+    if (!Objects.equals(this.full_name_nd_ro, other.full_name_nd_ro)) {
+      return false;
+    }
+    if (!Objects.equals(this.dsg, other.dsg)) {
+      return false;
+    }
+    return true;
+  }
+  
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextHit.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextHit.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextHit.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextHit.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+/**
+ *Stores a "hit" on a country and the start and end of the hit
+
+ */
+public class CountryContextHit {
+
+  private String countryCode;
+  private int start;
+  private int end;
+
+  public CountryContextHit() {
+  }
+
+  public CountryContextHit(String countryCode, int start, int end) {
+    this.countryCode = countryCode;
+    this.start = start;
+    this.end = end;
+  }
+
+  public String getCountryCode() {
+    return countryCode;
+  }
+
+  public void setCountryCode(String countryCode) {
+    this.countryCode = countryCode;
+  }
+
+  public int getStart() {
+    return start;
+  }
+
+  public void setStart(int start) {
+    this.start = start;
+  }
+
+  public int getEnd() {
+    return end;
+  }
+
+  public void setEnd(int end) {
+    this.end = end;
+  }
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import opennlp.tools.entitylinker.domain.BaseLink;
+import opennlp.tools.entitylinker.domain.LinkedSpan;
+import opennlp.tools.util.Span;
+
+/**
+ * Scores toponyms based on country context as well as fuzzy string matching
+ */
+public class CountryProximityScorer implements LinkedEntityScorer<CountryContext> {
+
+  private Map<String, Set<String>> nameCodesMap;
+  String dominantCode = "";
+
+  @Override
+  public void score(List<LinkedSpan> linkedSpans, String docText, Span[] sentenceSpans, CountryContext additionalContext) {
+
+    score(linkedSpans, additionalContext.getCountryMentions(), additionalContext.getNameCodesMap(), docText, sentenceSpans, 1000);
+
+  }
+
+  /**
+   * Assigns a score to each BaseLink in each linkedSpan's set of N best
+   * matches. Currently the scoring indicates the probability that the toponym
+   * is correct based on the country context in the document and fuzzy string
+   * matching
+   *
+   * @param linkedData     the linked spans, holds the Namefinder results, and
+   *                       the list of BaseLink for each
+   * @param countryHits    all the country mentions in the document
+   * @param nameCodesMap   maps a country indicator name to a country code. Used
+   *                       to determine if the namefinder found the same exact
+   *                       toponym the country context did. If so the score is
+   *                       boosted due to the high probability that the
+   *                       NameFinder actually "rediscovered" a country
+   * @param docText        the full text of the document...not used in this
+   *                       default implementation
+   * @param sentences      the sentences that correspond to the doc text.
+   * @param maxAllowedDist a constant that is used to determine which country
+   *                       mentions, based on proximity within the text, should
+   *                       be used to score the Named Entity.
+   * @return
+   */
+  public List<LinkedSpan> score(List<LinkedSpan> linkedData, Map<String, Set<Integer>> countryHits, Map<String, Set<String>> nameCodesMap, String docText, Span[] sentences, Integer maxAllowedDist) {
+    this.nameCodesMap = nameCodesMap;
+    setDominantCode(countryHits);
+    for (LinkedSpan<BaseLink> linkedspan : linkedData) {
+
+      linkedspan = simpleProximityAnalysis(sentences, countryHits, linkedspan, maxAllowedDist);
+    }
+    return linkedData;
+  }
+
+  /**
+   * sets class level variable to a code based on the number of mentions
+   *
+   * @param countryHits
+   */
+  private void setDominantCode(Map<String, Set<Integer>> countryHits) {
+    int hits = -1;
+    for (String code : countryHits.keySet()) {
+      if (countryHits.get(code).size() > hits) {
+        hits = countryHits.get(code).size();
+        dominantCode = code;
+      }
+    }
+  }
+
+  /**
+   * Generates distances from each country mention to the span's location in the
+   * doc text. Ultimately an attempt to ensure that ambiguously named toponyms
+   * are resolved to the correct country and coordinate.
+   *
+   * @param sentences
+   * @param countryHits
+   * @param span
+   * @return
+   */
+  private LinkedSpan<BaseLink> simpleProximityAnalysis(Span[] sentences, Map<String, Set<Integer>> countryHits, LinkedSpan<BaseLink> span, Integer maxAllowedDistance) {
+    Double score = 0.0;
+    //get the index of the actual span, begining of sentence
+    //should generate tokens from sentence and create a char offset...
+    //could have large sentences due to poor sentence detection or wonky doc text
+    int sentenceIdx = span.getSentenceid();
+    int sentIndexInDoc = sentences[sentenceIdx].getStart();
+    /**
+     * create a map of all the span's proximal country mentions in the document
+     * Map< countrycode, set of <distances from this NamedEntity>>
+     */
+    Map<String, Set<Integer>> distancesFromCodeMap = new HashMap<String, Set<Integer>>();
+    //map = Map<countrycode, Set <of distances this span is from all the mentions of the code>>
+    for (String cCode : countryHits.keySet()) {
+//iterate over all the regex start values and calculate an offset
+      for (Integer cHit : countryHits.get(cCode)) {
+        Integer absDist = Math.abs(sentIndexInDoc - cHit);
+        //only include near mentions based on a heuristic
+        //TODO make this a property
+        //  if (absDist < maxAllowedDistance) {
+        if (distancesFromCodeMap.containsKey(cCode)) {
+          distancesFromCodeMap.get(cCode).add(absDist);
+        } else {
+          HashSet<Integer> newset = new HashSet<Integer>();
+          newset.add(absDist);
+          distancesFromCodeMap.put(cCode, newset);
+        }
+      }
+
+      //}
+    }
+    //we now know how far this named entity is from every country mention in the document
+
+    /**
+     * the gaz matches that have a country code that have mentions in the doc
+     * that are closest to the Named Entity should return the best score Analyze
+     * map generates a likelihood score that the toponym from the gaz is
+     * referring to one of the countries Map<countrycode, prob that this span is
+     * referring to the toponym form this code key>
+     */
+    Map<String, Double> scoreMap = analyzeMap(distancesFromCodeMap, sentences, span);
+    for (BaseLink link : span.getLinkedEntries()) {
+      //getItemParentId is the country code
+      String spanCountryCode = link.getItemParentID();
+      if (scoreMap.containsKey(spanCountryCode)) {
+
+        score = scoreMap.get(spanCountryCode);
+        ///does the name extracted match a country name?
+        if (nameCodesMap.containsKey(link.getItemName().toLowerCase())) {
+          //if so, is it the correct country code for that name
+          if (nameCodesMap.get(link.getItemName().toLowerCase()).contains(link.getItemParentID())) {
+            //boost the score becuase it is likely that this is the location in the text, so add 50% to the score or set to 1
+            //TODO: make this multiplier configurable
+            //TODO: improve this with a geographic/geometry based clustering (linear binning to be more precise) of points returned from the gaz
+            score = (score + .75) > 1.0 ? 1d : (score + .75);
+            //boost the score if the hit is from the dominant country context
+
+            if (link.getItemParentID().equals(dominantCode)) {
+              score = (score + .25) > 1.0 ? 1d : (score + .25);
+            }
+
+
+          }
+
+        }
+      }
+      link.getScoreMap().put("countrycontext", score);
+    }
+    return span;
+  }
+
+  /**
+   * takes a map of distances from the NE to each country mention and generates
+   * a map of scores for each country code. The map is then correlated to teh
+   * correlated to the code of the BaseLink parentid for retrieval. Then the
+   * score is added to the overall.
+   *
+   * @param distanceMap
+   * @param sentences
+   * @param span
+   * @return
+   */
+  private Map<String, Double> analyzeMap(Map<String, Set<Integer>> distanceMap, Span[] sentences, LinkedSpan<BaseLink> span) {
+
+    Map<String, Double> scoreMap = new HashMap<String, Double>();
+    if(distanceMap.isEmpty()){
+      return scoreMap;
+    }
+    TreeSet<Integer> all = new TreeSet<Integer>();
+    for (String key : distanceMap.keySet()) {
+      all.addAll(distanceMap.get(key));
+    }
+    //get min max for normalization, this could be more efficient
+    Integer min = all.first();
+    Integer max = all.last();
+    for (String key : distanceMap.keySet()) {
+
+      TreeSet<Double> normalizedDistances = new TreeSet<Double>();
+      for (Integer i : distanceMap.get(key)) {
+        Double norm = normalize(i, min, max);
+        //reverse the normed distance so low numbers (closer) are better
+        //this could be improved with a "decaying " function using an imcreaseing negative exponent
+        Double reverse = Math.abs(norm - 1);
+        normalizedDistances.add(reverse);
+      }
+
+
+      List<Double> doubles = new ArrayList<Double>(normalizedDistances);
+      scoreMap.put(key, slidingDistanceAverage(doubles));
+    }
+    return scoreMap;
+  }
+
+  /**
+   * this method is an attempt to make closer clusters of mentions group
+   * together to smooth out the average, so one distant outlier does not kill
+   * the score for an obviously good hit. More elegant solution is possible
+   * using Math.pow, and making the score decay with distance by using an
+   * increasing negative exponent
+   *
+   * @param normDis the normalized and sorted set of distances as a list
+   * @return
+   */
+  private Double slidingDistanceAverage(List<Double> normDis) {
+    List<Double> windowOfAverages = new ArrayList<Double>();
+
+    if (normDis.size() < 3) {
+      windowOfAverages.addAll(normDis);
+    } else {
+
+      for (int i = 0; i < normDis.size() - 1; i++) {
+        double a = normDis.get(i);
+        double b = normDis.get(i + 1);
+        windowOfAverages.add((a + b) / 2);
+
+      }
+    }
+    double sum = 0d;
+    for (double d : windowOfAverages) {
+      sum += d;
+    }
+    double result = sum / windowOfAverages.size();
+    //TODO: ++ prob when large amounts of mentions for a code
+    //System.out.println("avg of window:" + result);
+    return result;
+  }
+
+  /**
+   * transposes a value within one range to a relative value in a different
+   * range. Used to normalize distances in this class.
+   *
+   * @param valueToNormalize the value to place within the new range
+   * @param minimum          the min of the set to be transposed
+   * @param maximum          the max of the set to be transposed
+   * @return
+   */
+  private Double normalize(int valueToNormalize, int minimum, int maximum) {
+    Double d = (double) ((1 - 0) * (valueToNormalize - minimum)) / (maximum - minimum) + 0;
+    d = d == null ? 0d : d;
+    return d;
+  }
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/FuzzyStringMatchScorer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/FuzzyStringMatchScorer.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/FuzzyStringMatchScorer.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/FuzzyStringMatchScorer.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import opennlp.tools.entitylinker.domain.BaseLink;
+import opennlp.tools.entitylinker.domain.LinkedSpan;
+import opennlp.tools.ngram.NGramGenerator;
+import opennlp.tools.util.Span;
+
+/**
+ *
+ * Generates scores for string comparisons.
+ */
+public class FuzzyStringMatchScorer implements LinkedEntityScorer<CountryContext> {
+
+  @Override
+  public void score(List<LinkedSpan> linkedSpans, String docText, Span[] sentenceSpans, CountryContext additionalContext) {
+    for (LinkedSpan<BaseLink> linkedSpan : linkedSpans) {
+      for (BaseLink link : linkedSpan.getLinkedEntries()) {
+        Double dice = getDiceCoefficient(linkedSpan.getSearchTerm().toLowerCase().replace(" ", ""), link.getItemName().toLowerCase().replace(" ", ""), 2);
+        link.getScoreMap().put("dice", dice);
+        Double ld = (double) getLevenshteinDistance(linkedSpan.getSearchTerm().toLowerCase().replace(" ", ""), link.getItemName().toLowerCase().replace(" ", ""));
+        link.getScoreMap().put("levenshtein", ld);
+      }
+    }
+
+  
+  }
+
+  /**
+   * Generates a score based on an overlap of nGrams between two strings using
+   * the DiceCoefficient technique.
+   *
+   * @param s1     first string
+   * @param s2     second string
+   * @param nGrams number of chars in each gram
+   * @return
+   */
+  public double getDiceCoefficient(String s1, String s2, int nGrams) {
+    if (s1.equals("") || s1.equals("")) {
+      return 0d;
+    }
+    List<String> s1Grams = NGramGenerator.generate(s1.toCharArray(), nGrams, "");
+    List<String> s2Grams = NGramGenerator.generate(s2.toCharArray(), nGrams, "");
+
+    Set<String> overlap = new HashSet<String>(s1Grams);
+    overlap.retainAll(s2Grams);
+    double totcombigrams = overlap.size();
+
+    return (2 * totcombigrams) / (s1Grams.size() + s2Grams.size());
+  }
+
+  private int minimum(int a, int b, int c) {
+    return Math.min(Math.min(a, b), c);
+  }
+
+  public int getLevenshteinDistance(CharSequence str1,
+          CharSequence str2) {
+    int[][] distance = new int[str1.length() + 1][str2.length() + 1];
+
+    for (int i = 0; i <= str1.length(); i++) {
+      distance[i][0] = i;
+    }
+    for (int j = 1; j <= str2.length(); j++) {
+      distance[0][j] = j;
+    }
+
+    for (int i = 1; i <= str1.length(); i++) {
+      for (int j = 1; j <= str2.length(); j++) {
+        distance[i][j] = minimum(
+                distance[i - 1][j] + 1,
+                distance[i][j - 1] + 1,
+                distance[i - 1][j - 1] + ((str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0 : 1));
+      }
+    }
+
+    return distance[str1.length()][str2.length()];
+  }
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerEntry.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerEntry.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerEntry.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerEntry.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.util.HashMap;
+import java.util.Map;
+import opennlp.tools.entitylinker.domain.BaseLink;
+
+/**
+ *
+ * Stores a record from a geographic placenames gazateer
+ */
+public class GazateerEntry extends BaseLink {
+
+  private Double latitude;
+  private Double longitude;
+  private String source;
+  private String indexID;
+  private Map<String, String> indexData=new HashMap<>();
+
+  public String getIndexID() {
+    return indexID;
+  }
+
+  public void setIndexID(String indexID) {
+    this.indexID = indexID;
+  }
+
+  public Double getLatitude() {
+    return latitude;
+  }
+
+  public void setLatitude(Double latitude) {
+    this.latitude = latitude;
+  }
+
+  public Double getLongitude() {
+    return longitude;
+  }
+
+  public void setLongitude(Double longitude) {
+    this.longitude = longitude;
+  }
+
+  public String getSource() {
+    return source;
+  }
+
+  public void setSource(String source) {
+    this.source = source;
+  }
+
+  public Map<String, String> getIndexData() {
+    return indexData;
+  }
+
+  public void setIndexData(Map<String, String> indexData) {
+    this.indexData = indexData;
+  }
+  
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,96 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.util.Version;
+
+/**
+ *
+ * @author Owner
+ */
+public class GazateerIndexer {
+
+  public enum GazType {
+
+    GEONAMES {
+      @Override
+      public String toString() {
+        return "/opennlp_geoentitylinker_usgsgaz_idx";
+      }
+    },
+    USGS {
+      @Override
+      public String toString() {
+        return "/opennlp_geoentitylinker_usgsgaz_idx";
+      }
+    }
+  }
+
+  public void index(File outputIndexDir, File gazateerInputData, GazType type) throws Exception {
+    if (!outputIndexDir.isDirectory()) {
+      throw new IllegalArgumentException("outputIndexDir must be a directory.");
+    }
+
+    String indexloc = outputIndexDir + type.toString();
+    Directory index = new MMapDirectory(new File(indexloc));
+
+    Analyzer a = new StandardAnalyzer(Version.LUCENE_45);
+    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, a);
+
+    IndexWriter w = new IndexWriter(index, config);
+
+    readFile(gazateerInputData, w);
+    w.commit();
+    w.close();
+
+  }
+
+  public void readFile(File gazateerInputData, IndexWriter w) throws Exception {
+    BufferedReader reader = new BufferedReader(new FileReader(gazateerInputData));
+    List<String> fields = new ArrayList<String>();
+    int counter = 0;
+    System.out.println("reading gazateer data from file...........");
+    while (reader.read() != -1) {
+      String line = reader.readLine();
+      String[] values = line.split("\\|");//nga format
+      if (counter == 0) {
+        // build fields
+        for (String columnName : values) {
+          fields.add(columnName.replace("»¿", ""));
+        }
+
+
+      } else {
+        Document doc = new Document();
+        for (int i = 0; i < fields.size() - 1; i++) {
+          doc.add(new TextField(fields.get(i), values[i], Field.Store.YES));
+        }
+        w.addDocument(doc);
+      }
+      counter++;
+      if (counter % 10000 == 0) {
+        w.commit();
+        System.out.println(counter + " .........committed to index..............");
+      }
+
+    }
+
+  }
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.queryparser.classic.ParseException;
+
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.util.Version;
+import opennlp.tools.entitylinker.EntityLinkerProperties;
+/**
+ *
+ * Searches Gazateers stored in a MMapDirectory lucene index
+ */
+public class GazateerSearcher {
+
+  private FuzzyStringMatchScorer diceScorer = new FuzzyStringMatchScorer();
+  private double scoreCutoff = .75;
+  private Directory geonamesIndex;//= new MMapDirectory(new File(indexloc));
+  private IndexReader geonamesReader;// = DirectoryReader.open(geonamesIndex);
+  private IndexSearcher geonamesSearcher;// = new IndexSearcher(geonamesReader);
+  private Analyzer geonamesAnalyzer;
+  //usgs US gazateer
+  private Directory usgsIndex;//= new MMapDirectory(new File(indexloc));
+  private IndexReader usgsReader;// = DirectoryReader.open(geonamesIndex);
+  private IndexSearcher usgsSearcher;// = new IndexSearcher(geonamesReader);
+  private Analyzer usgsAnalyzer;
+
+  public GazateerSearcher() {
+  }
+
+  public ArrayList<GazateerEntry> geonamesFind(String searchString, int rowsReturned, String code, EntityLinkerProperties properties) {
+    ArrayList<GazateerEntry> linkedData = new ArrayList<>();
+    try {
+
+
+      if (geonamesIndex == null) {
+        String indexloc = properties.getProperty("opennlp.geoentitylinker.gaz.geonames", "");
+        String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", ".75");
+        scoreCutoff = Double.valueOf(cutoff);
+        geonamesIndex = new MMapDirectory(new File(indexloc));
+        geonamesReader = DirectoryReader.open(geonamesIndex);
+        geonamesSearcher = new IndexSearcher(geonamesReader);
+        geonamesAnalyzer = new StandardAnalyzer(Version.LUCENE_45);
+      }
+
+      String luceneQueryString = "FULL_NAME_ND_RO:" + searchString + " & CC1:" + code.toUpperCase();// + "~1.0";
+      QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, geonamesAnalyzer);
+      Query q = parser.parse(luceneQueryString);
+
+
+      TopDocs search = geonamesSearcher.search(q, rowsReturned);
+      double maxScore = (double) search.getMaxScore();
+
+      for (int i = 0; i < search.scoreDocs.length; ++i) {
+        GazateerEntry entry = new GazateerEntry();
+        int docId = search.scoreDocs[i].doc;
+        double sc = search.scoreDocs[i].score;
+
+        entry.getScoreMap().put("lucene", sc);
+       
+        entry.getScoreMap().put("rawlucene", sc);
+        entry.setIndexID(docId + "");
+        entry.setSource("geonames");
+
+        Document d = geonamesSearcher.doc(docId);
+        List<IndexableField> fields = d.getFields();
+        for (int idx = 0; idx < fields.size(); idx++) {
+          String value = d.get(fields.get(idx).name());
+          value = value.toLowerCase();
+          switch (idx) {
+            case 1:
+              entry.setItemID(value);
+              break;
+            case 3:
+              entry.setLatitude(Double.valueOf(value));
+              break;
+            case 4:
+              entry.setLongitude(Double.valueOf(value));
+              break;
+            case 10:
+              entry.setItemType(value);
+              break;
+            case 12:
+              entry.setItemParentID(value);
+              break;
+            case 23:
+              entry.setItemName(value);
+              break;
+          }
+          entry.getIndexData().put(fields.get(idx).name(), value);
+        }
+        //only keep it if the country code is a match
+        if (entry.getItemParentID().toLowerCase().equals(code.toLowerCase())) {
+          linkedData.add(entry);
+        }
+      }
+
+      normalize(linkedData, 0d, maxScore);
+      prune(linkedData);
+    } catch (IOException | ParseException ex) {
+      System.err.println(ex);
+    }
+    return linkedData;
+  }
+
+  public ArrayList<GazateerEntry> usgsFind(String searchString, int rowsReturned, EntityLinkerProperties properties) {
+    ArrayList<GazateerEntry> linkedData = new ArrayList<>();
+    try {
+
+
+      if (usgsIndex == null) {
+        String indexloc = properties.getProperty("opennlp.geoentitylinker.gaz.usgs", "");
+        String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", ".75");
+        scoreCutoff = Double.valueOf(cutoff);
+        usgsIndex = new MMapDirectory(new File(indexloc));
+        usgsReader = DirectoryReader.open(usgsIndex);
+        usgsSearcher = new IndexSearcher(usgsReader);
+        usgsAnalyzer = new StandardAnalyzer(Version.LUCENE_45);
+      }
+
+      String luceneQueryString = "FEATURE_NAME:" + searchString + " OR MAP_NAME: " + searchString;
+      QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, usgsAnalyzer);
+      Query q = parser.parse(luceneQueryString);
+
+
+      TopDocs search = usgsSearcher.search(q, rowsReturned);
+      double maxScore = (double) search.getMaxScore();
+
+
+      for (int i = 0; i < search.scoreDocs.length; ++i) {
+        GazateerEntry entry = new GazateerEntry();
+        int docId = search.scoreDocs[i].doc;
+        double sc = search.scoreDocs[i].score;
+        //keep track of the min score for normalization
+
+        entry.getScoreMap().put("lucene", sc);
+        entry.getScoreMap().put("rawlucene", sc);
+        entry.setIndexID(docId + "");
+        entry.setSource("usgs");
+        entry.setItemParentID("us");
+
+
+        Document d = usgsSearcher.doc(docId);
+        List<IndexableField> fields = d.getFields();
+        for (int idx = 0; idx < fields.size(); idx++) {
+          String value = d.get(fields.get(idx).name());
+          value = value.toLowerCase();
+          switch (idx) {
+            case 0:
+              entry.setItemID(value);
+              break;
+            case 1:
+              entry.setItemName(value);
+              break;
+            case 2:
+              entry.setItemType(value);
+              break;
+            case 9:
+              entry.setLatitude(Double.valueOf(value));
+              break;
+            case 10:
+              entry.setLongitude(Double.valueOf(value));
+              break;
+          }
+          entry.getIndexData().put(fields.get(idx).name(), value);
+        }
+        linkedData.add(entry);
+
+
+      }
+
+      normalize(linkedData, 0d, maxScore);
+      prune(linkedData);
+    } catch (IOException | ParseException ex) {
+      System.err.println(ex);
+    }
+
+    return linkedData;
+  }
+
+  private void normalize(ArrayList<GazateerEntry> linkedData, Double minScore, Double maxScore) {
+    for (GazateerEntry gazateerEntry : linkedData) {
+
+      double luceneScore = gazateerEntry.getScoreMap().get("lucene");
+      luceneScore = normalize(luceneScore, minScore, maxScore);
+      luceneScore = luceneScore > 1.0 ? 1.0 : luceneScore;
+      luceneScore = (luceneScore == Double.NaN) ? 0.001 : luceneScore;
+      gazateerEntry.getScoreMap().put("lucene", luceneScore);
+    }
+  }
+
+  private void prune(ArrayList<GazateerEntry> linkedData) {
+    for (Iterator<GazateerEntry> itr = linkedData.iterator(); itr.hasNext();) {
+      GazateerEntry ge = itr.next();
+      if (ge.getScoreMap().get("lucene") < scoreCutoff) {
+        itr.remove();
+      }
+    }
+  }
+
+  private Double normalize(Double valueToNormalize, Double minimum, Double maximum) {
+    Double d = (double) ((1 - 0) * (valueToNormalize - minimum)) / (maximum - minimum) + 0;
+    d = d == null ? 0d : d;
+    return d;
+  }
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import opennlp.tools.entitylinker.domain.BaseLink;
+import opennlp.tools.entitylinker.domain.LinkedSpan;
+import opennlp.tools.util.Span;
+import opennlp.tools.entitylinker.EntityLinkerProperties;
+import opennlp.tools.entitylinker.EntityLinker;
+/**
+ * Links location entities to gazatteers. Currently supports gazateers in a
+ * MySql database (NGA and USGS)
+ *
+ *
+ */
+public class GeoEntityLinker implements EntityLinker<LinkedSpan> {
+
+  // CountryProximityScorer scorer = new CountryProximityScorer();
+//  private MySQLGeoNamesGazLinkable geoNamesGaz;// = new MySQLGeoNamesGazLinkable();
+//  private MySQLUSGSGazLinkable usgsGaz;//= new MySQLUSGSGazLinkable();
+  private CountryContext countryContext;
+  private Map<String, Set<Integer>> countryMentions;
+  private EntityLinkerProperties linkerProperties;
+  private GazateerSearcher gazateerSearcher = new GazateerSearcher();
+  /**
+   * Flag for deciding whether to search gaz only for toponyms within countries
+   * that are mentioned in the document
+   */
+  private Boolean filterCountryContext = true;
+
+  public GeoEntityLinker() {
+    countryContext = new CountryContext();
+  }
+
+  @Override
+  public List<LinkedSpan> find(String doctext, Span[] sentences, String[][] tokensBySentence, Span[][] namesBySentence) {
+    ArrayList<LinkedSpan> spans = new ArrayList<LinkedSpan>();
+
+    if (linkerProperties == null) {
+      throw new IllegalArgumentException("EntityLinkerProperties cannot be null");
+    }
+    countryMentions = countryContext.regexfind(doctext, linkerProperties);
+
+    for (int s = 0; s < sentences.length; s++) {
+      Span[] names = namesBySentence[s];
+      String[] tokens = tokensBySentence[s];
+      String[] matches = Span.spansToStrings(names, tokens);
+
+      for (int i = 0; i < matches.length; i++) {
+
+//nga gazateer is for other than US placenames, don't use it unless US is a mention in the document
+        ArrayList<BaseLink> geoNamesEntries = new ArrayList<BaseLink>();
+        if (!(countryMentions.keySet().contains("us") && countryMentions.keySet().size() == 1) || countryMentions.keySet().size() > 1 || countryMentions.keySet().isEmpty()) {
+          // geoNamesEntries = geoNamesGaz.find(matches[i], names[i], countryMentions, linkerProperties);
+          for (String code : countryMentions.keySet()) {
+            if (!code.equals("us")) {
+              geoNamesEntries.addAll(gazateerSearcher.geonamesFind(matches[i], 5, code, linkerProperties));
+            }
+          }
+
+        }
+        ArrayList<BaseLink> usgsEntries = new ArrayList<BaseLink>();
+        if (countryMentions.keySet().contains("us") || countryMentions.keySet().isEmpty()) {
+          //usgsEntries = usgsGaz.find(matches[i], names[i], linkerProperties);
+          usgsEntries.addAll(gazateerSearcher.usgsFind(matches[i], 3, linkerProperties));
+        }
+        LinkedSpan<BaseLink> geoSpan = new LinkedSpan<BaseLink>(geoNamesEntries, names[i].getStart(), names[i].getEnd());
+
+        if (!usgsEntries.isEmpty()) {
+          geoSpan.getLinkedEntries().addAll(usgsEntries);
+          geoSpan.setSearchTerm(matches[i]);
+        }
+
+        if (!geoSpan.getLinkedEntries().isEmpty()) {
+          geoSpan.setSearchTerm(matches[i]);
+          geoSpan.setSentenceid(s);
+          spans.add(geoSpan);
+        }
+      }
+    }
+
+    List<LinkedEntityScorer<CountryContext>> scorers = new ArrayList<>();
+    scorers.add(new FuzzyStringMatchScorer());
+    scorers.add(new GeoHashBinningScorer());
+    scorers.add(new CountryProximityScorer());
+
+    for (LinkedEntityScorer scorer : scorers) {
+      scorer.score(spans, doctext, sentences, countryContext);
+    }
+    return spans;
+  }
+
+  @Override
+  public void setEntityLinkerProperties(EntityLinkerProperties properties) {
+    this.linkerProperties = properties;
+  }
+
+  @Override
+  public List<LinkedSpan> find(String text, Span[] sentences, Span[] tokens, Span[] nameSpans) {
+    throw new UnsupportedOperationException("The GeoEntityLinker requires the entire document for proper scoring. This method is unsupported"); //To change body of generated methods, choose Tools | Templates.
+  }
+
+  @Override
+  public List<LinkedSpan> find(String text, Span[] sentences, Span[] tokens, Span[] nameSpans, int sentenceIndex) {
+    throw new UnsupportedOperationException("The GeoEntityLinker requires the entire document for proper scoring. This method is unsupported"); //To change body of generated methods, choose Tools | Templates.
+  }
+
+  @Override
+  public List<LinkedSpan> find(String text, Span[] sentences, String[] tokens, Span[] nameSpans) {
+    throw new UnsupportedOperationException("The GeoEntityLinker requires the entire document for proper scoring. This method is unsupported"); //To change body of generated methods, choose Tools | Templates.
+  }
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoHashBinningScorer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoHashBinningScorer.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoHashBinningScorer.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoHashBinningScorer.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,275 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import opennlp.tools.entitylinker.domain.BaseLink;
+import opennlp.tools.entitylinker.domain.LinkedSpan;
+import opennlp.tools.util.Span;
+
+/**
+ *Scores toponymns based on geographic point binning (clustering). This classes output is highly dependant on the quality
+ * of points returned from the gazateer. False positive hits from the index will pollute this result. Ensure the score cutoff for the
+ * Lucene search is set to an appropriate level so this class if not fed poor data.
+ */
+public class GeoHashBinningScorer implements LinkedEntityScorer<CountryContext> {
+
+  @Override
+  public void score(List<LinkedSpan> linkedSpans, String docText, Span[] sentenceSpans, CountryContext additionalContext) {
+     score( linkedSpans);
+  }
+
+  private  void score(List<LinkedSpan> geospans) {
+    Map<Double, Double> latLongs = new HashMap<Double, Double>();
+
+    /**
+     * collect all the lat longs
+     */
+    for (LinkedSpan<BaseLink> ls : geospans) {
+      for (BaseLink bl : ls.getLinkedEntries()) {
+        if (bl instanceof GazateerEntry) {
+          GazateerEntry entry = (GazateerEntry) bl;
+          latLongs.put(entry.getLatitude(), entry.getLongitude());
+        
+        }
+      }
+    }
+
+    /**
+     * convert to geohash and add to sortedset
+     */
+    TreeSet<Long> geoHashes = new TreeSet<Long>();
+    for (Map.Entry<Double, Double> entry : latLongs.entrySet()) {
+      geoHashes.add(geoHash(entry.getKey(), entry.getValue()));
+    }
+    /**
+     * bin the points and generate a scoremap
+     */
+    Map<Long, Set<Long>> bins = bin(geoHashes);
+    Map<Long, Double> scores = getScore((TreeMap<Long, Set<Long>>) bins);
+    /**
+     * iterate over the data again and assign the score based on the bins
+     */
+    for (LinkedSpan<BaseLink> ls : geospans) {
+      for (BaseLink bl : ls.getLinkedEntries()) {
+        Long geohash = -1L;
+        Double score = 0d;
+        if (bl instanceof GazateerEntry) {
+          GazateerEntry entry = (GazateerEntry) bl;
+          geohash = geoHash(entry.getLatitude(), entry.getLongitude());
+        
+        }
+        if (scores.containsKey(geohash)) {
+          score = scores.get(geohash);
+
+        } else {
+          for (Long bin : bins.keySet()) {
+            if (bin == geohash || bins.get(bin).contains(geohash)) {
+              score = scores.get(bin);
+              break;
+            }
+          }
+        }
+        bl.getScoreMap().put("geohashbin", score);
+      }
+    }
+
+
+  }
+
+  private Long normalize(Double coordpart, Boolean isLat) {
+    Integer add = isLat ? 90 : 180;
+    coordpart = Math.abs(coordpart + add);
+    coordpart = coordpart * 1000000;
+
+    Long l = Math.round(coordpart);
+    String coord = String.valueOf(l);
+    if (coord.length() < 8) {
+      while (coord.length() < 8) {
+        coord += "0";
+      }
+    }
+    coord = coord.substring(0, 8);
+    l = Long.valueOf(coord);
+    return l;
+  }
+
+  /**
+   * interleaves a lat and a long to place the coordinate in linear sortable
+   * space for binning simplicity
+   *
+   * @param lat
+   * @param lon
+   * @return
+   */
+  private Long geoHash(double lat, double lon) {
+    Long normLat = normalize(lat, Boolean.TRUE);
+    Long normLon = normalize(lon, Boolean.FALSE);
+    String sLat = String.valueOf(normLat);
+    String sLon = String.valueOf(normLon);
+    char[] latInts = sLat.toCharArray();
+    char[] lonInts = sLon.toCharArray();
+    String geoHash = "";
+    int len = latInts.length > lonInts.length ? lonInts.length : latInts.length;
+    for (int i = 0; i < len - 1; i++) {
+      String a = String.valueOf(latInts[i]);
+      String b = String.valueOf(lonInts[i]);
+      geoHash += a + b;
+    }
+
+    return Long.valueOf(geoHash);
+  }
+
+  private Map<Long, Set<Long>> bin(TreeSet<Long> sets) {
+    ArrayList<Long> list = new ArrayList<Long>(sets);
+    ArrayList<Long> diffs = new ArrayList<Long>();
+    /**
+     * create a set of differences between the points
+     */
+    for (int i = 0; i < list.size() - 1; i++) {
+      Long n = list.get(i + 1);
+      Long v = list.get(i);
+      diffs.add(Math.abs(n - v));
+    }
+    /**
+     * generate an average "distance" between the normed points
+     */
+    Long sum = 0L;
+    for (Long l : diffs) {
+      sum += l;
+    }
+    Long avg=sum;
+    if(!diffs.isEmpty()){
+     avg = sum / diffs.size();
+    }
+
+
+    /**
+     * generate break values where the disparity is greater than the average
+     */
+    TreeSet<Long> breaks = new TreeSet<Long>();
+    for (int i = 0; i < list.size() - 1; i++) {
+      Long n = list.get(i + 1);
+      Long v = list.get(i);
+      //Long percent = 100 - (v / n * 100);
+      Long diff = n - v;
+      if (diff > avg) {
+        breaks.add(v);
+      }
+    }
+    /**
+     * based on the break values, place subsets of close points into bins
+     */
+    TreeMap<Long, Set<Long>> binToAmount = new TreeMap<Long, Set<Long>>();
+    Long lastBreak = -1L;
+    for (Long br : breaks) {
+      if (lastBreak == -1L) {
+        binToAmount.put(br, sets.subSet(0L, true, br, true));
+      } else {
+        binToAmount.put(br, sets.subSet(lastBreak, false, br, true));
+      }
+      lastBreak = br;
+    }
+    lastBreak = sets.higher(lastBreak);
+    if (lastBreak != null) {
+      binToAmount.put(lastBreak, sets.subSet(lastBreak, true, sets.last(), true));
+      if (binToAmount.get(lastBreak).isEmpty()) {
+        binToAmount.get(lastBreak).add(lastBreak);
+      }
+    }
+    /**
+     * "binToAmount" is a map of the break value to all the points behind it
+     * (it's sorted), so the key is the max value of its set of values
+     */
+    return binToAmount;
+  }
+
+  /**
+   * returns a map of geohashes and their score
+   *
+   * @param binToAmount
+   * @return Map< Geohash, score>
+   */
+  private Map<Long, Double> getScore(TreeMap<Long, Set<Long>> binToAmount) {
+    TreeMap<Long, Double> ranks = new TreeMap<Long, Double>();
+    TreeMap<Long, Double> normRanks = new TreeMap<Long, Double>();
+    /**
+     * if there is only one bin return 1 as the rank for each item in the value
+     */
+    if (binToAmount.keySet().size() == 1 || binToAmount.keySet().isEmpty()) {
+      for (Long bin : binToAmount.keySet()) {
+        for (Long hash : binToAmount.get(bin)) {
+          ranks.put(bin, 1d);
+        }
+      }
+      return ranks;
+    }
+    int total = 0;
+    /**
+     * generate a total number of points
+     */
+    for (Set<Long> geohashes : binToAmount.values()) {
+      total += geohashes.size();
+    }
+    /**
+     * divide total by bin size, largest bin size gets best score, everything in
+     * that bin gets that score because it is part of that primary cluster
+     * TODO... do an extra iteration of clustering within the predominant
+     * cluster to refine the scoring or make the basis of the binning more
+     * granular than > avg
+     */
+    TreeSet<Double> rankSet = new TreeSet<Double>();
+    for (Long key : binToAmount.keySet()) {
+      int size = binToAmount.get(key).size();
+      Double rank = (double) total / size;
+      rankSet.add(rank);
+      ranks.put(key, rank);
+    }
+    /**
+     * load the final score map with normalized values
+     */
+    for (Map.Entry<Long, Double> rank : ranks.entrySet()) {
+      double norm = normalize(rank.getValue(), rankSet.first() + .1, rankSet.last() + .1);
+      double reverse = Math.abs(norm - 1);
+      double score = reverse > 1d ? 1.0 : reverse;
+      normRanks.put(rank.getKey(), score);
+    }
+
+    return normRanks;
+  }
+
+  /**
+   * transposes a number in a range to a double between 0 and 1
+   *
+   * @param valueToNormalize the value to be normalized (placed within a new
+   *                         range of 0-1)
+   * @param minimum          the min of the current range
+   * @param maximum          the max of the current range
+   * @return
+   */
+  private Double normalize(Double valueToNormalize, Double minimum, Double maximum) {
+    Double d = (double) ((1 - 0) * (valueToNormalize - minimum)) / (maximum - minimum) + 0;
+    d = d == null ? 0d : d;
+    return d;
+  }
+}
+

Added: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/LinkedEntityScorer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/LinkedEntityScorer.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/LinkedEntityScorer.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/LinkedEntityScorer.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.util.List;
+import opennlp.tools.entitylinker.domain.LinkedSpan;
+import opennlp.tools.util.Span;
+
+/**
+ * Structure for scoring linked entities. The Map logically represents a pair :
+ * "Score type" to the "actual Score."
+ */
+public interface LinkedEntityScorer<T> {
+
+/**
+ * Scores a collection of linked entities. Implementations should populate the scoreMap in the list of BaseLink for each linkedSpan
+ * @param linkedSpans the spans that have been linked to some external source and have all the data they need to be scored
+ * @param docText the full text of the document.
+ * @param sentenceSpans the sentence spans the correspond to the document text
+ * @param additionalContext any additional data required to perform the scoring operation
+ * @return void
+ */
+  void score(List<LinkedSpan> linkedSpans, String docText, Span[] sentenceSpans, T additionalContext);
+}

Added: opennlp/sandbox/apache-opennlp-addons/src/test/java/apache/opennlp/addons/AppTest.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/test/java/apache/opennlp/addons/AppTest.java?rev=1539319&view=auto
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/test/java/apache/opennlp/addons/AppTest.java (added)
+++ opennlp/sandbox/apache-opennlp-addons/src/test/java/apache/opennlp/addons/AppTest.java Wed Nov  6 11:47:37 2013
@@ -0,0 +1,38 @@
+package apache.opennlp.addons;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest 
+    extends TestCase
+{
+    /**
+     * Create the test case
+     *
+     * @param testName name of the test case
+     */
+    public AppTest( String testName )
+    {
+        super( testName );
+    }
+
+    /**
+     * @return the suite of tests being tested
+     */
+    public static Test suite()
+    {
+        return new TestSuite( AppTest.class );
+    }
+
+    /**
+     * Rigourous Test :-)
+     */
+    public void testApp()
+    {
+        assertTrue( true );
+    }
+}