You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2014/05/19 15:18:06 UTC
svn commit: r1595889 -
/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
Author: markg
Date: Mon May 19 13:18:06 2014
New Revision: 1595889
URL: http://svn.apache.org/r1595889
Log:
OPENNLP-698
Fixed cleanInput() method so it handles multi token names. Names are now returned in double quotes.
Modified:
opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
Modified: opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java?rev=1595889&r1=1595888&r2=1595889&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java (original)
+++ opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java Mon May 19 13:18:06 2014
@@ -61,6 +61,8 @@ public class GazetteerSearcher {
private Analyzer usgsAnalyzer;
private EntityLinkerProperties properties;
+
+
public GazetteerSearcher(EntityLinkerProperties properties) throws Exception {
this.properties = properties;
init();
@@ -155,7 +157,7 @@ public class GazetteerSearcher {
* only want hits above the levenstein thresh
*/
if (normLev.compareTo(scoreCutoff) >= 0) {
- if (entry.getItemParentID().toLowerCase().equals(code.toLowerCase())) {
+ if (entry.getItemParentID().toLowerCase().equals(code.toLowerCase()) || code.toLowerCase().equals("")) {
entry.getScoreMap().put("normlucene", normLev);
//make sure we don't produce a duplicate
if (!linkedData.contains(entry)) {
@@ -186,7 +188,7 @@ public class GazetteerSearcher {
*/
public ArrayList<GazetteerEntry> usgsFind(String searchString, int rowsReturned) {
ArrayList<GazetteerEntry> linkedData = new ArrayList<>();
- searchString = cleanInput(searchString);
+ searchString = cleanInput(searchString);
if (searchString.isEmpty()) {
return linkedData;
}
@@ -269,8 +271,15 @@ public class GazetteerSearcher {
return linkedData;
}
+ /**
+ * Replaces any noise chars with
+ * @param input
+ * @return
+ */
private String cleanInput(String input) {
- return input.replaceAll(REGEX_CLEAN, "").trim();
+ String output = input.replaceAll(REGEX_CLEAN, " ").trim();
+ System.out.println(output);
+ return "\"" + output + "\"";
}
private void init() throws Exception {