You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/09/02 23:43:41 UTC
svn commit: r1519520 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: entitylinker/
entitylinker/domain/ formats/brat/ ml/
Author: joern
Date: Mon Sep 2 21:43:40 2013
New Revision: 1519520
URL: http://svn.apache.org/r1519520
Log:
OPENNLP-588 eoEntityLinker does not provide a method for setting the properties file location in order to get the database connection, it is currently hard coded
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseEntityLinker.java Mon Sep 2 21:43:40 2013
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
import java.util.ArrayList;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContext.java Mon Sep 2 21:43:40 2013
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
import java.sql.CallableStatement;
@@ -25,8 +24,12 @@ import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
-import opennlp.tools.entitylinker.EntityLinkerProperties;
+/**
+ *Finds instances of country mentions in a String, typically a document text.
+ * Used to boost or degrade scoring of linked geo entities
+
+ */
public class CountryContext {
private Connection con;
@@ -47,7 +50,7 @@ public class CountryContext {
for (CountryContextEntry entry : countrydata) {
if (docText.contains(entry.getFull_name_nd_ro())) {
- System.out.println("hit on " + entry.getFull_name_nd_ro());
+ System.out.println("\tFound Country indicator: " + entry.getFull_name_nd_ro());
CountryContextHit hit = new CountryContextHit(entry.getCc1(), docText.indexOf(entry.getFull_name_nd_ro()), docText.indexOf(entry.getFull_name_nd_ro()+ entry.getFull_name_nd_ro().length()));
hits.add(hit);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextEntry.java Mon Sep 2 21:43:40 2013
@@ -13,11 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
/**
- *
+ *Stores a tuple from mysql that is used to find country mentions in document text.
*
*/
public class CountryContextEntry {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/CountryContextHit.java Mon Sep 2 21:43:40 2013
@@ -13,11 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
/**
- *
+ *Stores a "hit" on a country and the start and end of the hit
*/
public class CountryContextHit {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java Mon Sep 2 21:43:40 2013
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
import java.util.List;
@@ -33,6 +32,12 @@ import opennlp.tools.util.Span;
public interface EntityLinker<T extends Span> {
/**
+ * allows for passing properties through the EntityLinkerFactory into all impls dynamically
+ * @param properties the EntityLinkerProperties object that contains properties needed by the impl
+ */
+ void setEntityLinkerProperties(EntityLinkerProperties properties);
+
+ /**
*
* @param text the document text to be used as additional context, and to
* derive sentences and tokens String[]
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java Mon Sep 2 21:43:40 2013
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
import java.io.IOException;
@@ -21,11 +20,10 @@ import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
-import opennlp.tools.entitylinker.GeoEntityLinker;
/**
- * Generates Lists of EntityLinker implementations via
- * properties file configuration
+ * Generates Lists of EntityLinker implementations via properties file
+ * configuration
*
*/
public class EntityLinkerFactory {
@@ -35,29 +33,35 @@ public class EntityLinkerFactory {
* consists of a comma separated list of full class names. The entityType is
* used to build the key to the properties entry. the entityType will be
* prefixed with "linker." Therefore, a compliant property entry for location
- * entity linker types would be:
- * linker.<yourtype>=<yourclass1,yourclass2>
- * For example:
+ * entity linker types would be: linker.<yourtype>=<yourclass1,yourclass2> For
+ * example:
* linker.location=opennlp.tools.entitylinker.GeoEntityLinker,opennlp.tools.entitylinker.GeoEntityLinker2
*
*
* @param entityType the type of entity, the same as what would be returned
* from span.getType()
- * @param properties the entitylinker properties that contain the configured entitylinkers
- * @return
-
+ * @param properties the entitylinker properties that contain the configured
+ * entitylinkers
+ * @return *
*/
public static synchronized List<EntityLinker> getLinkers(String entityType, EntityLinkerProperties properties) {
List<EntityLinker> linkers = new ArrayList<EntityLinker>();
try {
- String listoflinkers = properties.getProperty("linker." + entityType, GeoEntityLinker.class.getName());
+ String listoflinkers = properties.getProperty("linker." + entityType, GeoEntityLinker.class.getName());
for (String classname : listoflinkers.split(",")) {
Class theClass = Class.forName(classname);
EntityLinker linker = (EntityLinker) theClass.newInstance();
System.out.println("EntityLinker factory instantiated: " + linker.getClass().getName());
+ linker.setEntityLinkerProperties(properties);
linkers.add(linker);
}
- } catch (Exception ex) {
+ } catch (InstantiationException ex) {
+ Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (IllegalAccessException ex) {
+ Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (ClassNotFoundException ex) {
+ Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (IOException ex) {
Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
}
return linkers;
@@ -65,8 +69,10 @@ public class EntityLinkerFactory {
/**
*
- * @param entityTypes the types of entities, i.e person, location, organization
- * @param properties the entitylinker properties that contain the configured entitylinkers
+ * @param entityTypes the types of entities, i.e person, location,
+ * organization
+ * @param properties the entitylinker properties that contain the configured
+ * entitylinkers
* @return
*/
public static synchronized List<EntityLinker> getLinkers(String[] entityTypes, EntityLinkerProperties properties) {
@@ -74,14 +80,21 @@ public class EntityLinkerFactory {
for (String entityType : entityTypes) {
try {
- String listoflinkers = properties.getProperty("linker." + entityType, GeoEntityLinker.class.getName());
+ String listoflinkers = properties.getProperty("linker." + entityType, GeoEntityLinker.class.getName());
for (String classname : listoflinkers.split(",")) {
Class theClass = Class.forName(classname);
EntityLinker linker = (EntityLinker) theClass.newInstance();
System.out.println("EntityLinker factory instantiated: " + linker.getClass().getName());
+ linker.setEntityLinkerProperties(properties);
linkers.add(linker);
}
- } catch (Exception ex) {
+ } catch (InstantiationException ex) {
+ Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (IllegalAccessException ex) {
+ Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (ClassNotFoundException ex) {
+ Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (IOException ex) {
Logger.getLogger(EntityLinkerFactory.class.getName()).log(Level.SEVERE, null, ex);
}
@@ -89,6 +102,4 @@ public class EntityLinkerFactory {
return linkers;
}
-
-
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java Mon Sep 2 21:43:40 2013
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
import java.io.File;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/GeoEntityLinker.java Mon Sep 2 21:43:40 2013
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
import java.io.File;
@@ -22,28 +21,29 @@ import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
-import opennlp.tools.entitylinker.EntityLinker;
-import opennlp.tools.entitylinker.EntityLinkerProperties;
import opennlp.tools.entitylinker.domain.BaseLink;
import opennlp.tools.entitylinker.domain.LinkedSpan;
import opennlp.tools.util.Span;
/**
* Links location entities to gazatteers.
+ *
+ *
*/
public class GeoEntityLinker implements EntityLinker<LinkedSpan> {
- MySQLGeoNamesGazLinkable geoNamesGaz;// = new MySQLGeoNamesGazLinkable();
- MySQLUSGSGazLinkable usgsGaz;//= new MySQLUSGSGazLinkable();
- CountryContext countryContext;
- List<CountryContextHit> hits;
- EntityLinkerProperties props;
+ private MySQLGeoNamesGazLinkable geoNamesGaz;// = new MySQLGeoNamesGazLinkable();
+ private MySQLUSGSGazLinkable usgsGaz;//= new MySQLUSGSGazLinkable();
+ private CountryContext countryContext;
+ private List<CountryContextHit> hits;
+ private EntityLinkerProperties props;
public GeoEntityLinker() {
if (geoNamesGaz == null || usgsGaz == null) {
geoNamesGaz = new MySQLGeoNamesGazLinkable();
usgsGaz = new MySQLUSGSGazLinkable();
countryContext = new CountryContext();
+
}
}
@@ -54,15 +54,18 @@ public class GeoEntityLinker implements
props = new EntityLinkerProperties(new File("C:\\temp\\opennlpmodels\\entitylinker.properties"));
}
if (hits == null) {
+ System.out.println("getting country context");
hits = countryContext.find(text, props);
}
-
+
String[] matches = Span.spansToStrings(names, tokens);
for (int i = 0; i < matches.length; i++) {
+ System.out.println("processing match " + i + " of " + matches.length);
ArrayList<BaseLink> geoNamesEntries = geoNamesGaz.find(matches[i], names[i], hits, props);
ArrayList<BaseLink> usgsEntries = usgsGaz.find(matches[i], names[i], hits, props);
- LinkedSpan<BaseLink> geoSpans = new LinkedSpan<BaseLink>(geoNamesEntries, names[i], 0);
+ LinkedSpan<BaseLink> geoSpans = new LinkedSpan<BaseLink>(geoNamesEntries, names[i].getStart(), names[i].getEnd());
geoSpans.getLinkedEntries().addAll(usgsEntries);
+ geoSpans.setSearchTerm(matches[i]);
spans.add(geoSpans);
}
return spans;
@@ -93,6 +96,7 @@ public class GeoEntityLinker implements
ArrayList<BaseLink> usgsEntries = usgsGaz.find(matches[i], names[i], hits, props);
LinkedSpan<BaseLink> geoSpans = new LinkedSpan<BaseLink>(geoNamesEntries, names[i], 0);
geoSpans.getLinkedEntries().addAll(usgsEntries);
+ geoSpans.setSearchTerm(matches[i]);
spans.add(geoSpans);
}
return spans;
@@ -110,7 +114,7 @@ public class GeoEntityLinker implements
props = new EntityLinkerProperties(new File("C:\\temp\\opennlpmodels\\entitylinker.properties"));
}
List<CountryContextHit> hits = countryContext.find(text, props);
-
+
Span s = sentences[sentenceIndex];
String sentence = text.substring(s.getStart(), s.getEnd());
@@ -123,6 +127,8 @@ public class GeoEntityLinker implements
ArrayList<BaseLink> usgsEntries = usgsGaz.find(matches[i], names[i], hits, props);
LinkedSpan<BaseLink> geoSpans = new LinkedSpan<BaseLink>(geoNamesEntries, names[i], 0);
geoSpans.getLinkedEntries().addAll(usgsEntries);
+ geoSpans.setSearchTerm(matches[i]);
+ geoSpans.setSentenceid(sentenceIndex);
spans.add(geoSpans);
}
@@ -131,4 +137,8 @@ public class GeoEntityLinker implements
}
return spans;
}
+
+ public void setEntityLinkerProperties(EntityLinkerProperties properties) {
+ this.props = properties;
+ }
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazEntry.java Mon Sep 2 21:43:40 2013
@@ -13,11 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
import opennlp.tools.entitylinker.domain.BaseLink;
+/**
+ *
+
+ */
public class MySQLGeoNamesGazEntry extends BaseLink
{
////actual fields returned
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLGeoNamesGazLinkable.java Mon Sep 2 21:43:40 2013
@@ -1,21 +1,9 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
package opennlp.tools.entitylinker;
+/**
+ *
+ * @author Owner
+ */
import java.io.File;
import java.sql.CallableStatement;
import java.sql.Connection;
@@ -28,10 +16,13 @@ import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
-import opennlp.tools.entitylinker.EntityLinkerProperties;
import opennlp.tools.entitylinker.domain.BaseLink;
import opennlp.tools.util.Span;
+/**
+ *
+ *
+ */
public final class MySQLGeoNamesGazLinkable {
private Connection con;
@@ -70,7 +61,7 @@ public final class MySQLGeoNamesGazLinka
String driver = property.getProperty("mysql.driver", "org.gjt.mm.mysql.Driver");
String url = property.getProperty("mysql.url", "jdbc:mysql://localhost:3306/world");
String username = property.getProperty("mysql.username", "root");
- String password = property.getProperty("mysql.password", "559447");
+ String password = property.getProperty("mysql.password", "?");
Class.forName(driver);
Connection conn = DriverManager.getConnection(url, username, password);
@@ -129,10 +120,10 @@ public final class MySQLGeoNamesGazLinka
if (filterCountryContext) {
if (countryCodes.contains(s.getCC1().toLowerCase())) {
- System.out.println("qualified on: " + s.getCC1());
+ // System.out.println(searchString +" GeoNames qualified on: " + s.getCC1());
s.setRank(s.getRank() + 1.0);
} else {
- System.out.println(s.getFULL_NAME_ND_RO() + ", with CC1 of "+ s.getCC1()+ ", is not within countries discovered in the document. The Country list used to discover countries can be modified in mysql procedure getCountryList()");
+ // System.out.println(s.getFULL_NAME_ND_RO() + ", with CC1 of "+ s.getCC1()+ ", is not within countries discovered in the document. The Country list used to discover countries can be modified in mysql procedure getCountryList()");
continue;
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazEntry.java Mon Sep 2 21:43:40 2013
@@ -17,7 +17,12 @@ package opennlp.tools.entitylinker;
import opennlp.tools.entitylinker.domain.BaseLink;
-public class MySQLUSGSGazEntry extends BaseLink {
+/**
+ *
+
+ */
+public class MySQLUSGSGazEntry extends BaseLink
+{
private double rank;
private String featureid;
@@ -28,76 +33,89 @@ public class MySQLUSGSGazEntry extends B
private double primarylongitudeDEC;
private String mapname;
- public double getRank() {
+ public double getRank()
+ {
return rank;
}
- public void setRank(double rank) {
+ public void setRank(double rank)
+ {
this.rank = rank;
}
- public String getFeatureid() {
+ public String getFeatureid()
+ {
return featureid;
}
- public void setFeatureid(String featureid) {
+ public void setFeatureid(String featureid)
+ {
this.featureid = featureid;
}
- public String getFeaturename() {
+ public String getFeaturename()
+ {
return featurename;
}
- public void setFeaturename(String featurename) {
+ public void setFeaturename(String featurename)
+ {
this.featurename = featurename;
}
- public String getFeatureclass() {
+ public String getFeatureclass()
+ {
return featureclass;
}
- public void setFeatureclass(String featureclass) {
+ public void setFeatureclass(String featureclass)
+ {
this.featureclass = featureclass;
}
- public String getStatealpha() {
+ public String getStatealpha()
+ {
return statealpha;
}
- public void setStatealpha(String statealpha) {
+ public void setStatealpha(String statealpha)
+ {
this.statealpha = statealpha;
}
- public double getPrimarylatitudeDEC() {
+ public double getPrimarylatitudeDEC()
+ {
return primarylatitudeDEC;
}
- public void setPrimarylatitudeDEC(double primarylatitudeDEC) {
+ public void setPrimarylatitudeDEC(double primarylatitudeDEC)
+ {
this.primarylatitudeDEC = primarylatitudeDEC;
}
- public double getPrimarylongitudeDEC() {
+ public double getPrimarylongitudeDEC()
+ {
return primarylongitudeDEC;
}
- public void setPrimarylongitudeDEC(double primarylongitudeDEC) {
+ public void setPrimarylongitudeDEC(double primarylongitudeDEC)
+ {
this.primarylongitudeDEC = primarylongitudeDEC;
}
- public String getMapname() {
+ public String getMapname()
+ {
return mapname;
}
- public void setMapname(String mapname) {
+ public void setMapname(String mapname)
+ {
this.mapname = mapname;
}
@Override
public String toString() {
- return "MySQLUSGSGazEntry{" + "rank=" + rank + ", featureid=" + featureid
- + ", featurename=" + featurename + ", featureclass=" + featureclass
- + ", statealpha=" + statealpha + ", primarylatitudeDEC="
- + primarylatitudeDEC + ", primarylongitudeDEC=" + primarylongitudeDEC
- + ", mapname=" + mapname + "}\n\n";
+ return "MySQLUSGSGazEntry{" + "rank=" + rank + ", featureid=" + featureid + ", featurename=" + featurename + ", featureclass=" + featureclass + ", statealpha=" + statealpha + ", primarylatitudeDEC=" + primarylatitudeDEC + ", primarylongitudeDEC=" + primarylongitudeDEC + ", mapname=" + mapname + "}\n\n";
}
+
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/MySQLUSGSGazLinkable.java Mon Sep 2 21:43:40 2013
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker;
import java.sql.CallableStatement;
@@ -27,10 +26,13 @@ import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
-import opennlp.tools.entitylinker.EntityLinkerProperties;
import opennlp.tools.entitylinker.domain.BaseLink;
import opennlp.tools.util.Span;
+/**
+ *
+ * @author opennlp
+ */
public class MySQLUSGSGazLinkable {
private Connection con;
@@ -67,7 +69,7 @@ public class MySQLUSGSGazLinkable {
String driver = properties.getProperty("mysql.driver", "org.gjt.mm.mysql.Driver");
String url = properties.getProperty("mysql.url", "jdbc:mysql://127.0.0.1:3306/world");
String username = properties.getProperty("mysql.username", "root");
- String password = properties.getProperty("mysql.password", "559447");
+ String password = properties.getProperty("mysql.password", "?");
Class.forName(driver);
Connection conn = DriverManager.getConnection(url, username, password);
@@ -103,8 +105,13 @@ public class MySQLUSGSGazLinkable {
s.setPrimarylongitudeDEC(rs.getDouble(7));
s.setMapname(rs.getString(8));
if (countryCodes.contains("us")) {
- s.setRank(s.getRank() + 1.0);
- System.out.println("qualified on: US");
+ s.setRank(s.getRank() + (s.getRank() * .5));
+ // System.out.println(searchString +"USGS qualified on: " + s.getFeaturename());
+ } else {
+ s.setRank(s.getRank() * .5);
+ if(filterCountryContext){
+ continue;
+ }
}
retUrls.add(s);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/BaseLink.java Mon Sep 2 21:43:40 2013
@@ -18,6 +18,8 @@ package opennlp.tools.entitylinker.domai
/**
* Stores a minimal tuple of information. Intended to be used with LinkedSpan
+ *
+
*/
public abstract class BaseLink {
@@ -91,8 +93,10 @@ public abstract class BaseLink {
this.itemType = itemType;
}
+
+
@Override
public String toString() {
return "BaseLink{" + "itemID=" + itemID + ", itemName=" + itemName + ", itemType=" + itemType + '}';
}
-}
+}
\ No newline at end of file
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/domain/LinkedSpan.java Mon Sep 2 21:43:40 2013
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.entitylinker.domain;
import java.util.ArrayList;
@@ -22,14 +21,16 @@ import opennlp.tools.util.Span;
/**
* An "default" extended span that holds additional information about the Span
*
-
+ *
*/
public class LinkedSpan<T extends BaseLink> extends Span {
private ArrayList<T> linkedEntries;
+ private int sentenceid = 0;
+ private String searchTerm;
-
+
public LinkedSpan(ArrayList<T> linkedEntries, int s, int e, String type) {
super(s, e, type);
this.linkedEntries = linkedEntries;
@@ -53,8 +54,28 @@ public class LinkedSpan<T extends BaseLi
this.linkedEntries = linkedEntries;
}
+ public int getSentenceid() {
+ return sentenceid;
+ }
+
+ public void setSentenceid(int sentenceid) {
+ this.sentenceid = sentenceid;
+ }
+ public String getSearchTerm() {
+ return searchTerm;
+ }
+
+ public void setSearchTerm(String searchTerm) {
+ this.searchTerm = searchTerm;
+ }
@Override
public String toString() {
return "LinkedSpan{" + "linkedEntries=" + linkedEntries + '}';
}
-}
+
+
+
+
+
+
+}
\ No newline at end of file
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java Mon Sep 2 21:43:40 2013
@@ -27,7 +27,11 @@ import java.util.Set;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
@@ -47,6 +51,15 @@ public class BratNameSampleStream extend
this.tokenizer = tokenizer;
}
+ protected BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel,
+ ObjectStream<BratDocument> samples) {
+ super(samples);
+
+ // TODO: We can pass in custom validators here ...
+ this.sentDetector = new SentenceDetectorME(sentModel);
+ this.tokenizer = new TokenizerME(tokenModel);
+ }
+
@Override
protected List<NameSample> read(BratDocument sample) throws IOException {
@@ -64,6 +77,22 @@ public class BratNameSampleStream extend
Span sentences[] = sentDetector.sentPosDetect(sample.getText());
+ // TODO: Sentence breaks should be avoided inside name annotations
+ // a) Merge two sentences, if an end/begin pair is part of a name annotation
+ // b) Implement a custom sentence validator which can be injected into the SD
+
+ // How could a custom validator be injected into an already instantiated sentence detector ?1
+ // Via a set method ...
+ // Via constructor ... probably best option, but a bit tricky to work with the SD interface then
+ //
+
+
+ // TODO: Token breaks should be enforced on name span boundaries
+ // a) Just split tokens
+ // b) Implement a custom token split validator which can be injected into the Tokenizer
+
+ // Currently we are missing all
+
List<NameSample> samples = new ArrayList<NameSample>(sentences.length);
for (Span sentence : sentences) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java?rev=1519520&r1=1519519&r2=1519520&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java Mon Sep 2 21:43:40 2013
@@ -18,8 +18,10 @@
package opennlp.tools.ml;
import java.lang.reflect.Constructor;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import opennlp.tools.ml.maxent.GIS;
@@ -131,6 +133,7 @@ public class TrainerFactory {
throw new IllegalArgumentException(msg, e);
}
}
+
return theTrainer;
}
}