You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/09/02 23:43:41 UTC

svn commit: r1519520 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: entitylinker/ entitylinker/domain/ formats/brat/ ml/

Author: joern
Date: Mon Sep  2 21:43:40 2013
New Revision: 1519520

URL: http://svn.apache.org/r1519520
Log:
OPENNLP-588 eoEntityLinker does not provide a method for setting the properties file location in order to get the database connection, it is currently hard coded

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java Mon Sep  2 21:43:40 2013
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 import java.util.ArrayList;

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java Mon Sep  2 21:43:40 2013
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 import java.sql.CallableStatement;
@@ -25,8 +24,12 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
-import opennlp.tools.entitylinker.EntityLinkerProperties;
 
+/**
+ *Finds instances of country mentions in a String, typically a document text.
+ * Used to boost or degrade scoring of linked geo entities
+
+ */
 public class CountryContext {
 
   private Connection con;
@@ -47,7 +50,7 @@ public class CountryContext {
       for (CountryContextEntry entry : countrydata) {
 
         if (docText.contains(entry.getFull_name_nd_ro())) {
-          System.out.println("hit on " + entry.getFull_name_nd_ro());
+          System.out.println("\tFound Country indicator: " + entry.getFull_name_nd_ro());
           CountryContextHit hit = new CountryContextHit(entry.getCc1(), docText.indexOf(entry.getFull_name_nd_ro()), docText.indexOf(entry.getFull_name_nd_ro()+ entry.getFull_name_nd_ro().length()));
           hits.add(hit);
         }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java Mon Sep  2 21:43:40 2013
@@ -13,11 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 /**
- *
+ *Stores a tuple from mysql that is used to find country mentions in document text.
  *
  */
 public class CountryContextEntry {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java Mon Sep  2 21:43:40 2013
@@ -13,11 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 /**
- *
+ *Stores a "hit" on a country and the start and end of the hit
 
  */
 public class CountryContextHit {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java Mon Sep  2 21:43:40 2013
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 import java.util.List;
@@ -33,6 +32,12 @@ import opennlp.tools.util.Span;
 public interface EntityLinker<T extends Span> {
 
   /**
+   * allows for passing properties through the EntityLinkerFactory into all impls dynamically
+   * @param properties the EntityLinkerProperties object that contains properties needed by the impl
+   */
+  void setEntityLinkerProperties(EntityLinkerProperties properties);
+  
+  /**
    *
    * @param text      the document text to be used as additional context, and to
    *                  derive sentences and tokens String[]

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java Mon Sep  2 21:43:40 2013
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 import java.io.IOException;
@@ -21,11 +20,10 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
-import opennlp.tools.entitylinker.GeoEntityLinker;
 
 /**
- * Generates Lists of EntityLinker implementations via
- * properties file configuration
+ * Generates Lists of EntityLinker implementations via properties file
+ * configuration
  *
  */
 public class EntityLinkerFactory {
@@ -35,29 +33,35 @@ public class EntityLinkerFactory {
    * consists of a comma separated list of full class names. The entityType is
    * used to build the key to the properties entry. the entityType will be
    * prefixed with "linker." Therefore, a compliant property entry for location
-   * entity linker types would be:
-   * linker.<yourtype>=<yourclass1,yourclass2>
-   * For example:
+   * entity linker types would be: linker.<yourtype>=<yourclass1,yourclass2> For
+   * example:
    * linker.location=opennlp.tools.entitylinker.GeoEntityLinker,opennlp.tools.entitylinker.GeoEntityLinker2
    *
    *
    * @param entityType the type of entity, the same as what would be returned
    *                   from span.getType()
-   * @param properties the entitylinker properties that contain the configured entitylinkers
-   * @return  
-  
+   * @param properties the entitylinker properties that contain the configured
+   *                   entitylinkers
+   * @return *
    */
   public static synchronized List<EntityLinker> getLinkers(String entityType, EntityLinkerProperties properties) {
     List<EntityLinker> linkers = new ArrayList<EntityLinker>();
     try {
-      String listoflinkers = properties.getProperty("linker." + entityType, GeoEntityLinker.class.getName());    
+      String listoflinkers = properties.getProperty("linker." + entityType, GeoEntityLinker.class.getName());
       for (String classname : listoflinkers.split(",")) {
         Class theClass = Class.forName(classname);
         EntityLinker linker = (EntityLinker) theClass.newInstance();
         System.out.println("EntityLinker factory instantiated: " + linker.getClass().getName());
+        linker.setEntityLinkerProperties(properties);
         linkers.add(linker);
       }
-    } catch (Exception ex) {
+    } catch (InstantiationException ex) {
+      Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+    } catch (IllegalAccessException ex) {
+      Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+    } catch (ClassNotFoundException ex) {
+      Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+    } catch (IOException ex) {
       Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
     }
     return linkers;
@@ -65,8 +69,10 @@ public class EntityLinkerFactory {
 
   /**
    *
-   * @param entityTypes the types of entities, i.e person, location, organization
-     * @param properties the entitylinker properties that contain the configured entitylinkers
+   * @param entityTypes the types of entities, i.e person, location,
+   *                    organization
+   * @param properties  the entitylinker properties that contain the configured
+   *                    entitylinkers
    * @return
    */
   public static synchronized List<EntityLinker> getLinkers(String[] entityTypes, EntityLinkerProperties properties) {
@@ -74,14 +80,21 @@ public class EntityLinkerFactory {
 
     for (String entityType : entityTypes) {
       try {
-        String listoflinkers = properties.getProperty("linker." + entityType, GeoEntityLinker.class.getName());   
+        String listoflinkers = properties.getProperty("linker." + entityType, GeoEntityLinker.class.getName());
         for (String classname : listoflinkers.split(",")) {
           Class theClass = Class.forName(classname);
           EntityLinker linker = (EntityLinker) theClass.newInstance();
           System.out.println("EntityLinker factory instantiated: " + linker.getClass().getName());
+          linker.setEntityLinkerProperties(properties);
           linkers.add(linker);
         }
-      }  catch (Exception ex) {
+      } catch (InstantiationException ex) {
+        Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+      } catch (IllegalAccessException ex) {
+        Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+      } catch (ClassNotFoundException ex) {
+        Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+      } catch (IOException ex) {
         Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
       }
 
@@ -89,6 +102,4 @@ public class EntityLinkerFactory {
 
     return linkers;
   }
-
-
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java Mon Sep  2 21:43:40 2013
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 import java.io.File;

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java Mon Sep  2 21:43:40 2013
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 import java.io.File;
@@ -22,28 +21,29 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
-import opennlp.tools.entitylinker.EntityLinker;
-import opennlp.tools.entitylinker.EntityLinkerProperties;
 import opennlp.tools.entitylinker.domain.BaseLink;
 import opennlp.tools.entitylinker.domain.LinkedSpan;
 import opennlp.tools.util.Span;
 
 /**
  * Links location entities to gazatteers.
+ *
+ *
  */
 public class GeoEntityLinker implements EntityLinker<LinkedSpan> {
 
-  MySQLGeoNamesGazLinkable geoNamesGaz;// = new MySQLGeoNamesGazLinkable();
-  MySQLUSGSGazLinkable usgsGaz;//= new MySQLUSGSGazLinkable();
-  CountryContext countryContext;
-  List<CountryContextHit> hits;
-  EntityLinkerProperties props;
+  private MySQLGeoNamesGazLinkable geoNamesGaz;// = new MySQLGeoNamesGazLinkable();
+  private MySQLUSGSGazLinkable usgsGaz;//= new MySQLUSGSGazLinkable();
+  private CountryContext countryContext;
+  private List<CountryContextHit> hits;
+  private EntityLinkerProperties props;
 
   public GeoEntityLinker() {
     if (geoNamesGaz == null || usgsGaz == null) {
       geoNamesGaz = new MySQLGeoNamesGazLinkable();
       usgsGaz = new MySQLUSGSGazLinkable();
       countryContext = new CountryContext();
+
     }
   }
 
@@ -54,15 +54,18 @@ public class GeoEntityLinker implements 
         props = new EntityLinkerProperties(new File("C:\\temp\\opennlpmodels\\entitylinker.properties"));
       }
       if (hits == null) {
+        System.out.println("getting country context");
         hits = countryContext.find(text, props);
       }
-     
+
       String[] matches = Span.spansToStrings(names, tokens);
       for (int i = 0; i < matches.length; i++) {
+        System.out.println("processing match " + i + " of " + matches.length);
         ArrayList<BaseLink> geoNamesEntries = geoNamesGaz.find(matches[i], names[i], hits, props);
         ArrayList<BaseLink> usgsEntries = usgsGaz.find(matches[i], names[i], hits, props);
-        LinkedSpan<BaseLink> geoSpans = new LinkedSpan<BaseLink>(geoNamesEntries, names[i], 0);
+        LinkedSpan<BaseLink> geoSpans = new LinkedSpan<BaseLink>(geoNamesEntries, names[i].getStart(), names[i].getEnd());
         geoSpans.getLinkedEntries().addAll(usgsEntries);
+        geoSpans.setSearchTerm(matches[i]);
         spans.add(geoSpans);
       }
       return spans;
@@ -93,6 +96,7 @@ public class GeoEntityLinker implements 
         ArrayList<BaseLink> usgsEntries = usgsGaz.find(matches[i], names[i], hits, props);
         LinkedSpan<BaseLink> geoSpans = new LinkedSpan<BaseLink>(geoNamesEntries, names[i], 0);
         geoSpans.getLinkedEntries().addAll(usgsEntries);
+        geoSpans.setSearchTerm(matches[i]);
         spans.add(geoSpans);
       }
       return spans;
@@ -110,7 +114,7 @@ public class GeoEntityLinker implements 
         props = new EntityLinkerProperties(new File("C:\\temp\\opennlpmodels\\entitylinker.properties"));
       }
       List<CountryContextHit> hits = countryContext.find(text, props);
-  
+
       Span s = sentences[sentenceIndex];
       String sentence = text.substring(s.getStart(), s.getEnd());
 
@@ -123,6 +127,8 @@ public class GeoEntityLinker implements 
         ArrayList<BaseLink> usgsEntries = usgsGaz.find(matches[i], names[i], hits, props);
         LinkedSpan<BaseLink> geoSpans = new LinkedSpan<BaseLink>(geoNamesEntries, names[i], 0);
         geoSpans.getLinkedEntries().addAll(usgsEntries);
+        geoSpans.setSearchTerm(matches[i]);
+        geoSpans.setSentenceid(sentenceIndex);
         spans.add(geoSpans);
       }
 
@@ -131,4 +137,8 @@ public class GeoEntityLinker implements 
     }
     return spans;
   }
+
+  public void setEntityLinkerProperties(EntityLinkerProperties properties) {
+    this.props = properties;
+  }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java Mon Sep  2 21:43:40 2013
@@ -13,11 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 import opennlp.tools.entitylinker.domain.BaseLink;
 
+/**
+ *
+
+ */
 public class MySQLGeoNamesGazEntry extends BaseLink
 {
   ////actual fields returned

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java Mon Sep  2 21:43:40 2013
@@ -1,21 +1,9 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 package opennlp.tools.entitylinker;
 
+/**
+ *
+ * @author Owner
+ */
 import java.io.File;
 import java.sql.CallableStatement;
 import java.sql.Connection;
@@ -28,10 +16,13 @@ import java.util.List;
 import java.util.Set;
 import java.util.logging.Level;
 import java.util.logging.Logger;
-import opennlp.tools.entitylinker.EntityLinkerProperties;
 import opennlp.tools.entitylinker.domain.BaseLink;
 import opennlp.tools.util.Span;
 
+/**
+ *
+ *
+ */
 public final class MySQLGeoNamesGazLinkable {
 
   private Connection con;
@@ -70,7 +61,7 @@ public final class MySQLGeoNamesGazLinka
     String driver = property.getProperty("mysql.driver", "org.gjt.mm.mysql.Driver");
     String url = property.getProperty("mysql.url", "jdbc:mysql://localhost:3306/world");
     String username = property.getProperty("mysql.username", "root");
-    String password = property.getProperty("mysql.password", "559447");
+    String password = property.getProperty("mysql.password", "?");
 
     Class.forName(driver);
     Connection conn = DriverManager.getConnection(url, username, password);
@@ -129,10 +120,10 @@ public final class MySQLGeoNamesGazLinka
 
         if (filterCountryContext) {
           if (countryCodes.contains(s.getCC1().toLowerCase())) {
-            System.out.println("qualified on: " + s.getCC1());
+          //  System.out.println(searchString +" GeoNames qualified on: " + s.getCC1());
             s.setRank(s.getRank() + 1.0);
           } else {
-             System.out.println(s.getFULL_NAME_ND_RO() + ", with CC1 of "+ s.getCC1()+ ", is not within countries discovered in the document. The Country list used to discover countries can be modified in mysql procedure getCountryList()");
+         //    System.out.println(s.getFULL_NAME_ND_RO() + ", with CC1 of "+ s.getCC1()+ ", is not within countries discovered in the document. The Country list used to discover countries can be modified in mysql procedure getCountryList()");
             continue;
           }
         }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java Mon Sep  2 21:43:40 2013
@@ -17,7 +17,12 @@ package opennlp.tools.entitylinker;
 
 import opennlp.tools.entitylinker.domain.BaseLink;
 
-public class MySQLUSGSGazEntry extends BaseLink {
+/**
+ *
+
+ */
+public class MySQLUSGSGazEntry extends BaseLink
+{
 
   private double rank;
   private String featureid;
@@ -28,76 +33,89 @@ public class MySQLUSGSGazEntry extends B
   private double primarylongitudeDEC;
   private String mapname;
 
-  public double getRank() {
+  public double getRank()
+  {
     return rank;
   }
 
-  public void setRank(double rank) {
+  public void setRank(double rank)
+  {
     this.rank = rank;
   }
 
-  public String getFeatureid() {
+  public String getFeatureid()
+  {
     return featureid;
   }
 
-  public void setFeatureid(String featureid) {
+  public void setFeatureid(String featureid)
+  {
     this.featureid = featureid;
   }
 
-  public String getFeaturename() {
+  public String getFeaturename()
+  {
     return featurename;
   }
 
-  public void setFeaturename(String featurename) {
+  public void setFeaturename(String featurename)
+  {
     this.featurename = featurename;
   }
 
-  public String getFeatureclass() {
+  public String getFeatureclass()
+  {
     return featureclass;
   }
 
-  public void setFeatureclass(String featureclass) {
+  public void setFeatureclass(String featureclass)
+  {
     this.featureclass = featureclass;
   }
 
-  public String getStatealpha() {
+  public String getStatealpha()
+  {
     return statealpha;
   }
 
-  public void setStatealpha(String statealpha) {
+  public void setStatealpha(String statealpha)
+  {
     this.statealpha = statealpha;
   }
 
-  public double getPrimarylatitudeDEC() {
+  public double getPrimarylatitudeDEC()
+  {
     return primarylatitudeDEC;
   }
 
-  public void setPrimarylatitudeDEC(double primarylatitudeDEC) {
+  public void setPrimarylatitudeDEC(double primarylatitudeDEC)
+  {
     this.primarylatitudeDEC = primarylatitudeDEC;
   }
 
-  public double getPrimarylongitudeDEC() {
+  public double getPrimarylongitudeDEC()
+  {
     return primarylongitudeDEC;
   }
 
-  public void setPrimarylongitudeDEC(double primarylongitudeDEC) {
+  public void setPrimarylongitudeDEC(double primarylongitudeDEC)
+  {
     this.primarylongitudeDEC = primarylongitudeDEC;
   }
 
-  public String getMapname() {
+  public String getMapname()
+  {
     return mapname;
   }
 
-  public void setMapname(String mapname) {
+  public void setMapname(String mapname)
+  {
     this.mapname = mapname;
   }
 
   @Override
   public String toString() {
-    return "MySQLUSGSGazEntry{" + "rank=" + rank + ", featureid=" + featureid
-        + ", featurename=" + featurename + ", featureclass=" + featureclass
-        + ", statealpha=" + statealpha + ", primarylatitudeDEC="
-        + primarylatitudeDEC + ", primarylongitudeDEC=" + primarylongitudeDEC
-        + ", mapname=" + mapname + "}\n\n";
+    return "MySQLUSGSGazEntry{" + "rank=" + rank + ", featureid=" + featureid + ", featurename=" + featurename + ", featureclass=" + featureclass + ", statealpha=" + statealpha + ", primarylatitudeDEC=" + primarylatitudeDEC + ", primarylongitudeDEC=" + primarylongitudeDEC + ", mapname=" + mapname + "}\n\n";
   }
+  
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java Mon Sep  2 21:43:40 2013
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker;
 
 import java.sql.CallableStatement;
@@ -27,10 +26,13 @@ import java.util.List;
 import java.util.Set;
 import java.util.logging.Level;
 import java.util.logging.Logger;
-import opennlp.tools.entitylinker.EntityLinkerProperties;
 import opennlp.tools.entitylinker.domain.BaseLink;
 import opennlp.tools.util.Span;
 
+/**
+ *
+ * @author opennlp
+ */
 public class MySQLUSGSGazLinkable {
 
   private Connection con;
@@ -67,7 +69,7 @@ public class MySQLUSGSGazLinkable {
     String driver = properties.getProperty("mysql.driver", "org.gjt.mm.mysql.Driver");
     String url = properties.getProperty("mysql.url", "jdbc:mysql://127.0.0.1:3306/world");
     String username = properties.getProperty("mysql.username", "root");
-    String password = properties.getProperty("mysql.password", "559447");
+    String password = properties.getProperty("mysql.password", "?");
 
     Class.forName(driver);
     Connection conn = DriverManager.getConnection(url, username, password);
@@ -103,8 +105,13 @@ public class MySQLUSGSGazLinkable {
         s.setPrimarylongitudeDEC(rs.getDouble(7));
         s.setMapname(rs.getString(8));
         if (countryCodes.contains("us")) {
-          s.setRank(s.getRank() + 1.0);
-          System.out.println("qualified on: US");
+          s.setRank(s.getRank() + (s.getRank() * .5));
+         // System.out.println(searchString +"USGS qualified on: " + s.getFeaturename());
+        } else {
+          s.setRank(s.getRank() * .5);
+          if(filterCountryContext){
+            continue;
+          }
         }
         retUrls.add(s);
       }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java Mon Sep  2 21:43:40 2013
@@ -18,6 +18,8 @@ package opennlp.tools.entitylinker.domai
 
 /**
  * Stores a minimal tuple of information. Intended to be used with LinkedSpan
+ *
+
  */
 public abstract class BaseLink {
 
@@ -91,8 +93,10 @@ public abstract class BaseLink {
     this.itemType = itemType;
   }
 
+
+
   @Override
   public String toString() {
     return "BaseLink{" + "itemID=" + itemID + ", itemName=" + itemName + ", itemType=" + itemType + '}';
   }
-}
+}
\ No newline at end of file

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java Mon Sep  2 21:43:40 2013
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.entitylinker.domain;
 
 import java.util.ArrayList;
@@ -22,14 +21,16 @@ import opennlp.tools.util.Span;
 /**
  * An "default" extended span that holds additional information about the Span
  *
-
+ *
  */
 public class LinkedSpan<T extends BaseLink> extends Span {
 
   private ArrayList<T> linkedEntries;
+  private int sentenceid = 0;
+  private String searchTerm;
 
- 
 
+  
   public LinkedSpan(ArrayList<T> linkedEntries, int s, int e, String type) {
     super(s, e, type);
     this.linkedEntries = linkedEntries;
@@ -53,8 +54,28 @@ public class LinkedSpan<T extends BaseLi
     this.linkedEntries = linkedEntries;
   }
 
+  public int getSentenceid() {
+    return sentenceid;
+  }
+
+  public void setSentenceid(int sentenceid) {
+    this.sentenceid = sentenceid;
+  }
+  public String getSearchTerm() {
+    return searchTerm;
+  }
+
+  public void setSearchTerm(String searchTerm) {
+    this.searchTerm = searchTerm;
+  }
   @Override
   public String toString() {
     return "LinkedSpan{" + "linkedEntries=" + linkedEntries + '}';
   }
-}
+
+
+
+
+
+  
+}
\ No newline at end of file

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java Mon Sep  2 21:43:40 2013
@@ -27,7 +27,11 @@ import java.util.Set;
 
 import opennlp.tools.namefind.NameSample;
 import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
 import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Span;
 
@@ -47,6 +51,15 @@ public class BratNameSampleStream extend
     this.tokenizer = tokenizer;
   }
   
+  protected BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel,
+      ObjectStream<BratDocument> samples) {
+    super(samples);
+    
+    // TODO: We can pass in custom validators here ... 
+    this.sentDetector = new SentenceDetectorME(sentModel);
+    this.tokenizer = new TokenizerME(tokenModel);
+  }
+  
   @Override
   protected List<NameSample> read(BratDocument sample) throws IOException {
     
@@ -64,6 +77,22 @@ public class BratNameSampleStream extend
     
     Span sentences[] = sentDetector.sentPosDetect(sample.getText());
     
+    // TODO: Sentence breaks should be avoided inside name annotations
+    // a) Merge two sentences, if an end/begin pair is part of a name annotation
+    // b) Implement a custom sentence validator which can be injected into the SD
+    
+    // How could a custom validator be injected into an already instantiated sentence detector ?1
+    // Via a set method ...
+    // Via constructor ... probably best option, but a bit tricky to work with the SD interface then
+    // 
+    
+    
+    // TODO: Token breaks should be enforced on name span boundaries
+    // a) Just split tokens
+    // b) Implement a custom token split validator which can be injected into the Tokenizer
+    
+    // Currently we are missing all 
+    
     List<NameSample> samples = new ArrayList<NameSample>(sentences.length);
     
     for (Span sentence : sentences) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java Mon Sep  2 21:43:40 2013
@@ -18,8 +18,10 @@
 package opennlp.tools.ml;
 
 import java.lang.reflect.Constructor;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import opennlp.tools.ml.maxent.GIS;
@@ -131,6 +133,7 @@ public class TrainerFactory {
         throw new IllegalArgumentException(msg, e);
       }
     }
+    
     return theTrainer;
   }
 }