You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2013/11/14 13:16:46 UTC

svn commit: r1541887 - in /opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker: GazateerIndexer.java GazateerSearcher.java GeoEntityLinkerSetupUtils.java

Author: markg
Date: Thu Nov 14 12:16:46 2013
New Revision: 1541887

URL: http://svn.apache.org/r1541887
Log:
OPENNLP-579
Fixed a bug in the GazateerIndexer. Refined the SetupUtils.

Modified:
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
    opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java

Modified: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java?rev=1541887&r1=1541886&r2=1541887&view=diff
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java (original)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java Thu Nov 14 12:16:46 2013
@@ -38,12 +38,22 @@ import org.apache.lucene.util.Version;
  */
 public class GazateerIndexer {
 
-  public enum GazType {
+  public static interface Separable {
+
+    String getSeparator();
+  }
+
+  public enum GazType implements Separable {
 
     GEONAMES {
       @Override
       public String toString() {
-        return "/opennlp_geoentitylinker_usgsgaz_idx";
+        return "/opennlp_geoentitylinker_geonames_idx";
+      }
+
+      @Override
+      public String getSeparator() {
+        return "\t";
       }
     },
     USGS {
@@ -51,6 +61,11 @@ public class GazateerIndexer {
       public String toString() {
         return "/opennlp_geoentitylinker_usgsgaz_idx";
       }
+
+      @Override
+      public String getSeparator() {
+        return "\\|";
+      }
     }
   }
 
@@ -67,24 +82,24 @@ public class GazateerIndexer {
 
     IndexWriter w = new IndexWriter(index, config);
 
-    readFile(gazateerInputData, w);
+    readFile(gazateerInputData, w, type);
     w.commit();
     w.close();
 
   }
 
-  public void readFile(File gazateerInputData, IndexWriter w) throws Exception {
+  public void readFile(File gazateerInputData, IndexWriter w, GazType type) throws Exception {
     BufferedReader reader = new BufferedReader(new FileReader(gazateerInputData));
     List<String> fields = new ArrayList<String>();
     int counter = 0;
     System.out.println("reading gazateer data from file...........");
     while (reader.read() != -1) {
       String line = reader.readLine();
-      String[] values = line.split("\\|");//nga format
+      String[] values = line.split(type.getSeparator());
       if (counter == 0) {
         // build fields
         for (String columnName : values) {
-          fields.add(columnName.replace("»¿", ""));
+          fields.add(columnName.replace("»¿", "").trim());
         }
 
 
@@ -102,6 +117,7 @@ public class GazateerIndexer {
       }
 
     }
-
+    w.commit();
+    System.out.println("Completed indexing gaz! index name is: " + type.toString());
   }
 }

Modified: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java?rev=1541887&r1=1541886&r2=1541887&view=diff
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java (original)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java Thu Nov 14 12:16:46 2013
@@ -70,9 +70,10 @@ public class GazateerSearcher {
         geonamesReader = DirectoryReader.open(geonamesIndex);
         geonamesSearcher = new IndexSearcher(geonamesReader);
         geonamesAnalyzer = new StandardAnalyzer(Version.LUCENE_45);
+
       }
 
-      String luceneQueryString = "FULL_NAME_ND_RO:" + searchString + " AND CC1:" + code.toLowerCase() + "^100";
+      String luceneQueryString = "FULL_NAME_ND_RO:" + searchString + " AND CC1:" + code.toLowerCase() + "^10000";
       QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, geonamesAnalyzer);
       Query q = parser.parse(luceneQueryString);
 

Modified: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java?rev=1541887&r1=1541886&r2=1541887&view=diff
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java (original)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java Thu Nov 14 12:16:46 2013
@@ -83,7 +83,7 @@ public class GeoEntityLinkerSetupUtils {
         }
       }
     }
-    System.out.println("Document processing complete. Writing traininf data to file");
+    System.out.println("Document processing complete. Writing training data to "+ annotationOutFile.getAbsolutePath());
     writer.close();
     System.out.println("Building Doccat model...");
     DoccatModel model = null;
@@ -116,7 +116,7 @@ public class GeoEntityLinkerSetupUtils {
    * @param radius
    * @return
    */
-  public static Map<String, ArrayList<String>> modelCountryContext(String docText, CountryContext additionalContext, int radius) {
+  private static Map<String, ArrayList<String>> modelCountryContext(String docText, CountryContext additionalContext, int radius) {
     Map<String, ArrayList< String>> featureBags = new HashMap<>();
     Map<String, Set<Integer>> countryMentions = additionalContext.getCountryMentions();
     /**