You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2014/05/19 15:18:06 UTC

svn commit: r1595889 - /opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java

Author: markg
Date: Mon May 19 13:18:06 2014
New Revision: 1595889

URL: http://svn.apache.org/r1595889
Log:
OPENNLP-698
Fixed cleanInput() method so it handles multi token names. Names are now returned in double quotes.

Modified:
    opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java

Modified: opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java?rev=1595889&r1=1595888&r2=1595889&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java (original)
+++ opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java Mon May 19 13:18:06 2014
@@ -61,6 +61,8 @@ public class GazetteerSearcher {
   private Analyzer usgsAnalyzer;
   private EntityLinkerProperties properties;
 
+
+
   public GazetteerSearcher(EntityLinkerProperties properties) throws Exception {
     this.properties = properties;
     init();
@@ -155,7 +157,7 @@ public class GazetteerSearcher {
          * only want hits above the levenstein thresh
          */
         if (normLev.compareTo(scoreCutoff) >= 0) {
-          if (entry.getItemParentID().toLowerCase().equals(code.toLowerCase())) {
+          if (entry.getItemParentID().toLowerCase().equals(code.toLowerCase()) || code.toLowerCase().equals("")) {
             entry.getScoreMap().put("normlucene", normLev);
             //make sure we don't produce a duplicate
             if (!linkedData.contains(entry)) {
@@ -186,7 +188,7 @@ public class GazetteerSearcher {
    */
   public ArrayList<GazetteerEntry> usgsFind(String searchString, int rowsReturned) {
     ArrayList<GazetteerEntry> linkedData = new ArrayList<>();
-     searchString = cleanInput(searchString);
+    searchString = cleanInput(searchString);
     if (searchString.isEmpty()) {
       return linkedData;
     }
@@ -269,8 +271,15 @@ public class GazetteerSearcher {
     return linkedData;
   }
 
+  /**
+   * Replaces any noise chars with 
+   * @param input
+   * @return 
+   */
   private String cleanInput(String input) {
-    return input.replaceAll(REGEX_CLEAN, "").trim();
+    String output = input.replaceAll(REGEX_CLEAN, " ").trim();
+    System.out.println(output);
+    return "\"" + output + "\"";
   }
 
   private void init() throws Exception {