You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2014/05/12 21:30:22 UTC

svn commit: r1594067 - in /opennlp/addons/geoentitylinker-addon: ./ src/main/java/opennlp/addons/geoentitylinker/

Author: markg
Date: Mon May 12 19:30:21 2014
New Revision: 1594067

URL: http://svn.apache.org/r1594067
Log:
OPENNLP-693
OPENNLP-694
OPENNLP-692
Added log4j logging. Added lucene spatial. removed the optional tags from pom for lucene dependency. Also added string sanitizing to Gazetteer searcher so lucene will stop logging syntax problems on noisy NER results.

Modified:
    opennlp/addons/geoentitylinker-addon/pom.xml
    opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/CountryContext.java
    opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java
    opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
    opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java
    opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/ModelBasedScorer.java
    opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/PointClustering.java

Modified: opennlp/addons/geoentitylinker-addon/pom.xml
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/pom.xml?rev=1594067&r1=1594066&r2=1594067&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/pom.xml (original)
+++ opennlp/addons/geoentitylinker-addon/pom.xml Mon May 12 19:30:21 2014
@@ -1,66 +1,73 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.opennlp</groupId>
-    <artifactId>opennlp</artifactId>
-    <version>1.6.0-SNAPSHOT</version>
-    <relativePath>../opennlp/pom.xml</relativePath>
-  </parent>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp</artifactId>
+        <version>1.6.0-SNAPSHOT</version>
+        <relativePath>../opennlp/pom.xml</relativePath>
+    </parent>
 
-  <artifactId>geoentitylinker-addon</artifactId>
-  <version>1.0-SNAPSHOT</version>
-  <packaging>jar</packaging>
-  <name>geoentitylinker-addon</name>
+    <artifactId>geoentitylinker-addon</artifactId>
+    <version>1.0-SNAPSHOT</version>
+    <packaging>jar</packaging>
+    <name>geoentitylinker-addon</name>
 
-  <url>http://maven.apache.org</url>
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>2.3.2</version>
-        <configuration>
-          <source>1.7</source>
-          <target>1.7</target>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
-  <properties>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-  </properties>
+    <url>http://maven.apache.org</url>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>2.3.2</version>
+                <configuration>
+                    <source>1.7</source>
+                    <target>1.7</target>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
 
-  <dependencies>
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>3.8.1</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-core</artifactId>
-      <version>4.5.0</version>
-      <optional>true</optional>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-analyzers-common</artifactId>
-      <version>4.5.0</version>
-      <optional>true</optional>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-queryparser</artifactId>
-      <version>4.5.0</version>
-      <optional>true</optional>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.opennlp</groupId>
-      <artifactId>opennlp-tools</artifactId>
-      <version>1.6.0-SNAPSHOT</version>
-      <optional>true</optional>
-    </dependency>
-  </dependencies>
+    <dependencies>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>3.8.1</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>log4j</groupId>
+            <artifactId>log4j</artifactId>
+            <version>1.2.16</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-spatial</artifactId>
+            <version>4.8.0</version>
+        </dependency>
+            
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-core</artifactId>
+            <version>4.8.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-analyzers-common</artifactId>
+            <version>4.8.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-queryparser</artifactId>
+            <version>4.8.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.opennlp</groupId>
+            <artifactId>opennlp-tools</artifactId>
+            <version>1.6.0-SNAPSHOT</version>
+        </dependency>
+    </dependencies>
 </project>

Modified: opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/CountryContext.java
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/CountryContext.java?rev=1594067&r1=1594066&r2=1594067&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/CountryContext.java (original)
+++ opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/CountryContext.java Mon May 12 19:30:21 2014
@@ -25,11 +25,11 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
+
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import opennlp.tools.entitylinker.EntityLinkerProperties;
+import org.apache.log4j.Logger;
 
 /**
  * Finds instances of country mentions in a String, typically a document text.
@@ -38,22 +38,23 @@ import opennlp.tools.entitylinker.Entity
  */
 public class CountryContext {
 
+  private static final Logger LOGGER = Logger.getLogger(CountryContext.class);
   private List<CountryContextEntry> countrydata;
   private Map<String, Set<String>> nameCodesMap = new HashMap<>();
   private Map<String, Set<Integer>> countryMentions = new HashMap<>();
   private Set<CountryContextEntry> countryHits = new HashSet<>();
   private EntityLinkerProperties properties;
-
+  
   public CountryContext(EntityLinkerProperties properties) throws Exception {
     this.properties = properties;
     if (countrydata == null) {
       String path = this.properties.getProperty("opennlp.geoentitylinker.countrycontext.filepath", "");
-
+      
       File countryContextFile = new File(path);
       countrydata = getCountryContextFromFile(countryContextFile);
     }
   }
-
+  
   public Map<String, Set<Integer>> getCountryMentions() {
     return countryMentions;
   }
@@ -75,7 +76,7 @@ public class CountryContext {
   public Map<String, Set<String>> getNameCodesMap() {
     return nameCodesMap;
   }
-
+  
   public void setNameCodesMap(Map<String, Set<String>> nameCodesMap) {
     this.nameCodesMap = nameCodesMap;
   }
@@ -90,7 +91,7 @@ public class CountryContext {
    * Finding mentions in documents is very helpful for scoring. Lazily loads the
    * list from the file.
    *
-   * @param docText    the full text of the document
+   * @param docText the full text of the document
    * @param properties EntityLinkerProperties for getting database connection
    * @return
    */
@@ -98,13 +99,12 @@ public class CountryContext {
     countryMentions = new HashMap<>();
     nameCodesMap.clear();
     try {
-
-
+      
       for (CountryContextEntry entry : countrydata) {
         Pattern regex = Pattern.compile(entry.getFull_name_nd_ro().trim(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
         Matcher rs = regex.matcher(docText);
         String code = entry.getCc1().toLowerCase();
-
+        
         boolean found = false;
         while (rs.find()) {
           found = true;
@@ -130,27 +130,26 @@ public class CountryContext {
         if (found) {
           countryHits.add(entry);
         }
-
+        
       }
-
+      
     } catch (Exception ex) {
-      Logger.getLogger(CountryContext.class.getName()).log(Level.SEVERE, null, ex);
+      LOGGER.error(ex);
     }
-
-
+    
     return countryMentions;
   }
-
+  
   private List<CountryContextEntry> getCountryContextFromFile(File countryContextFile) {
     List<CountryContextEntry> entries = new ArrayList<>();
     String path = countryContextFile.getPath();
     BufferedReader reader;
-
+    
     try {
       path = properties.getProperty("opennlp.geoentitylinker.countrycontext.filepath", "");
-
+      
       reader = new BufferedReader(new FileReader(path));
-
+      
       while (reader.read() != -1) {
         String line = reader.readLine();
         String[] values = line.split("\t");
@@ -166,10 +165,10 @@ public class CountryContext {
         entries.add(entry);
       }
       reader.close();
-    } catch (IOException e) {
-      System.err.println(e);
+    } catch (IOException ex) {
+      LOGGER.error(ex);
     }
     return entries;
-
+    
   }
 }

Modified: opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java?rev=1594067&r1=1594066&r2=1594067&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java (original)
+++ opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java Mon May 12 19:30:21 2014
@@ -94,8 +94,8 @@ public class GazetteerIndexer {
     String indexloc = outputIndexDir + type.toString();
     Directory index = new MMapDirectory(new File(indexloc));
 
-    Analyzer a = new StandardAnalyzer(Version.LUCENE_45, new CharArraySet(Version.LUCENE_45, new ArrayList(), true));
-    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, a);
+    Analyzer a = new StandardAnalyzer(Version.LUCENE_48, new CharArraySet(Version.LUCENE_48, new ArrayList(), true));
+    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, a);
 
     IndexWriter w = new IndexWriter(index, config);
 

Modified: opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java?rev=1594067&r1=1594066&r2=1594067&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java (original)
+++ opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java Mon May 12 19:30:21 2014
@@ -35,6 +35,7 @@ import org.apache.lucene.store.Directory
 import org.apache.lucene.store.MMapDirectory;
 import org.apache.lucene.util.Version;
 import opennlp.tools.entitylinker.EntityLinkerProperties;
+import org.apache.log4j.Logger;
 import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
@@ -46,6 +47,8 @@ import org.apache.lucene.analysis.util.C
  */
 public class GazetteerSearcher {
 
+  private final String REGEX_CLEAN = "[^\\p{L}\\p{Nd}]";
+  private static final Logger LOGGER = Logger.getLogger(GazetteerSearcher.class);
   private double scoreCutoff = .90;
   private Directory geonamesIndex;//= new MMapDirectory(new File(indexloc));
   private IndexReader geonamesReader;// = DirectoryReader.open(geonamesIndex);
@@ -67,13 +70,16 @@ public class GazetteerSearcher {
    *
    * @param searchString the named entity to look up in the lucene index
    * @param rowsReturned how many rows to allow lucene to return
-   * @param code         the country code
+   * @param code the country code
    *
    * @return
    */
   public ArrayList<GazetteerEntry> geonamesFind(String searchString, int rowsReturned, String code) {
     ArrayList<GazetteerEntry> linkedData = new ArrayList<>();
-
+    searchString = cleanInput(searchString);
+    if (searchString.isEmpty()) {
+      return linkedData;
+    }
     try {
       /**
        * build the search string Sometimes no country context is found. In this
@@ -91,7 +97,7 @@ public class GazetteerSearcher {
         return get;
       }
 
-      QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, geonamesAnalyzer);
+      QueryParser parser = new QueryParser(Version.LUCENE_48, luceneQueryString, geonamesAnalyzer);
       Query q = parser.parse(luceneQueryString);
 
       TopDocs search = geonamesSearcher.search(q, rowsReturned);
@@ -164,7 +170,7 @@ public class GazetteerSearcher {
       }
 
     } catch (IOException | ParseException ex) {
-      System.err.println(ex);
+      LOGGER.error(ex);
     }
 
     return linkedData;
@@ -180,6 +186,10 @@ public class GazetteerSearcher {
    */
   public ArrayList<GazetteerEntry> usgsFind(String searchString, int rowsReturned) {
     ArrayList<GazetteerEntry> linkedData = new ArrayList<>();
+     searchString = cleanInput(searchString);
+    if (searchString.isEmpty()) {
+      return linkedData;
+    }
     String luceneQueryString = "FEATURE_NAME:" + searchString.toLowerCase().trim() + " OR MAP_NAME: " + searchString.toLowerCase().trim();
     try {
 
@@ -191,7 +201,7 @@ public class GazetteerSearcher {
         //if the name is already there, return the list of cavhed results
         return get;
       }
-      QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, usgsAnalyzer);
+      QueryParser parser = new QueryParser(Version.LUCENE_48, luceneQueryString, usgsAnalyzer);
       Query q = parser.parse(luceneQueryString);
 
       TopDocs search = usgsSearcher.search(q, rowsReturned);
@@ -253,30 +263,34 @@ public class GazetteerSearcher {
       }
 
     } catch (IOException | ParseException ex) {
-      System.err.println(ex);
+      LOGGER.error(ex);
     }
 
     return linkedData;
   }
 
+  private String cleanInput(String input) {
+    return input.replaceAll(REGEX_CLEAN, "").trim();
+  }
+
   private void init() throws Exception {
     if (usgsIndex == null) {
       String indexloc = properties.getProperty("opennlp.geoentitylinker.gaz.usgs", "");
       if (indexloc.equals("")) {
-        System.out.println("USGS Gaz location not found");
-
+        // System.out.println("USGS Gaz location not found");
+        LOGGER.error(new Exception("USGS Gaz location not found"));
       }
       String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", String.valueOf(scoreCutoff));
       scoreCutoff = Double.valueOf(cutoff);
       usgsIndex = new MMapDirectory(new File(indexloc));
       usgsReader = DirectoryReader.open(usgsIndex);
       usgsSearcher = new IndexSearcher(usgsReader);
-      usgsAnalyzer = new StandardAnalyzer(Version.LUCENE_45, new CharArraySet(Version.LUCENE_45, new ArrayList(), true));
+      usgsAnalyzer = new StandardAnalyzer(Version.LUCENE_48, new CharArraySet(Version.LUCENE_48, new ArrayList(), true));
     }
     if (geonamesIndex == null) {
       String indexloc = properties.getProperty("opennlp.geoentitylinker.gaz.geonames", "");
       if (indexloc.equals("")) {
-        System.out.println("Geonames Gaz location not found");
+        LOGGER.error(new Exception("Geonames Gaz location not found"));
 
       }
       String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", String.valueOf(scoreCutoff));
@@ -285,7 +299,7 @@ public class GazetteerSearcher {
       geonamesReader = DirectoryReader.open(geonamesIndex);
       geonamesSearcher = new IndexSearcher(geonamesReader);
       //TODO: a language code switch statement should be employed here at some point
-      geonamesAnalyzer = new StandardAnalyzer(Version.LUCENE_45, new CharArraySet(Version.LUCENE_45, new ArrayList(), true));
+      geonamesAnalyzer = new StandardAnalyzer(Version.LUCENE_48, new CharArraySet(Version.LUCENE_48, new ArrayList(), true));
 
     }
   }

Modified: opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java?rev=1594067&r1=1594066&r2=1594067&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java (original)
+++ opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java Mon May 12 19:30:21 2014
@@ -39,7 +39,6 @@ public class GeoEntityLinker implements 
   private GazetteerSearcher gazateerSearcher;
   private List<LinkedEntityScorer> scorers = new ArrayList<>();
 
-
   @Override
   public List<LinkedSpan> find(String doctext, Span[] sentences, String[][] tokensBySentence, Span[][] namesBySentence) {
     ArrayList<LinkedSpan> spans = new ArrayList<LinkedSpan>();
@@ -64,7 +63,7 @@ public class GeoEntityLinker implements 
         ArrayList<BaseLink> geoNamesEntries = new ArrayList<>();
         if (!(countryMentions.keySet().contains("us") && countryMentions.keySet().size() == 1)
                 || countryMentions.keySet().size() > 1 || countryMentions.keySet().isEmpty()) {
-        
+
           if (!countryMentions.keySet().isEmpty()) {
             for (String code : countryMentions.keySet()) {
               if (!code.equals("us")) {
@@ -82,7 +81,8 @@ public class GeoEntityLinker implements 
           //usgsEntries = usgsGaz.find(matches[i], names[i], linkerProperties);
           usgsEntries.addAll(gazateerSearcher.usgsFind(matches[i], 3));
         }
-        LinkedSpan<BaseLink> geoSpan = new LinkedSpan<>(geoNamesEntries, names[i].getStart(), names[i].getEnd());
+        LinkedSpan<BaseLink> geoSpan = new LinkedSpan<>(geoNamesEntries, names[i].getStart(), names[i].getEnd(), "location",names[i].getProb());
+    
 
         if (!usgsEntries.isEmpty()) {
           geoSpan.getLinkedEntries().addAll(usgsEntries);

Modified: opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/ModelBasedScorer.java
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/ModelBasedScorer.java?rev=1594067&r1=1594066&r2=1594067&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/ModelBasedScorer.java (original)
+++ opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/ModelBasedScorer.java Mon May 12 19:30:21 2014
@@ -27,6 +27,7 @@ import opennlp.tools.entitylinker.Entity
 import opennlp.tools.entitylinker.BaseLink;
 import opennlp.tools.entitylinker.LinkedSpan;
 import opennlp.tools.util.Span;
+import org.apache.log4j.Logger;
 
 /**
  *
@@ -34,6 +35,7 @@ import opennlp.tools.util.Span;
  */
 public class ModelBasedScorer implements LinkedEntityScorer<CountryContext> {
 
+  private static final Logger LOGGER = Logger.getLogger(ModelBasedScorer.class);
   DocumentCategorizerME documentCategorizerME;
   DoccatModel doccatModel;
   public static final int RADIUS = 200;
@@ -45,12 +47,9 @@ public class ModelBasedScorer implements
       if (doccatModel == null) {
         String path = properties.getProperty("opennlp.geoentitylinker.modelbasedscorer.modelpath", "");
         if (path.equals("")) {
-          if (!modelexists) {
-            System.err.println(this.getClass().getSimpleName() + ": could not find property \"opennlp.geoentitylinker.modelbasedscorer.modelpath\" : no ModelBasedScoring will be performed");
-          }
-          modelexists = true;
           return;
         }
+        modelexists = true;
         doccatModel = new DoccatModel(new File(path));
         documentCategorizerME = new DocumentCategorizerME(doccatModel);
       }
@@ -67,11 +66,11 @@ public class ModelBasedScorer implements
       }
 
     } catch (FileNotFoundException ex) {
-      System.err.println(this.getClass().getSimpleName() + ": could not find modelpath using EntityLinkerProperties. Property should be \"opennlp.geoentitylinker.modelbasedscorer.modelpath\"");
+      LOGGER.error(ex);
     } catch (IOException ex) {
-      System.err.println(ex);
+      LOGGER.error(ex);
     } catch (Exception ex) {
-      System.err.println(ex);
+      LOGGER.error(ex);
     }
   }
 
@@ -80,11 +79,11 @@ public class ModelBasedScorer implements
    * radius of a mention within the doctext
    *
    * @param linkedSpans
+   * @param sentenceSpans
    * @param docText
-   * @param additionalContext
    * @param radius
    * @return a map of the index of the linked span to the string of surrounding
-   *         text: Map<indexofspan,surrounding text>
+   * text: Map<indexofspan,surrounding text>
    */
   public Map<Integer, String> generateProximalFeatures(List<LinkedSpan> linkedSpans, Span[] sentenceSpans, String docText, int radius) {
     Map<Integer, String> featureBags = new HashMap<>();
@@ -115,7 +114,6 @@ public class ModelBasedScorer implements
       featureBags.put(entry.getKey(), getTextChunk(entry.getValue(), docText, radius));
     }
 
-
     return featureBags;
   }
 

Modified: opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/PointClustering.java
URL: http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/PointClustering.java?rev=1594067&r1=1594066&r2=1594067&view=diff
==============================================================================
--- opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/PointClustering.java (original)
+++ opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/PointClustering.java Mon May 12 19:30:21 2014
@@ -15,6 +15,9 @@
  */
 package opennlp.addons.geoentitylinker;
 
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.io.GeohashUtils;
+import com.spatial4j.core.shape.Point;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -42,8 +45,7 @@ public class PointClustering {
       Double latw = entry.getLatitude();
       Double lonw = entry.getLongitude();
 
-
-      String key = simpleGeohash(latw, lonw).substring(0, precision);
+      String key = geoHash(latw, lonw).substring(0, precision);
       if (map.containsKey(key)) {
         map.get(key).add(entry);
       } else {
@@ -72,7 +74,44 @@ public class PointClustering {
       }
     }
 
+  }
+
+  /**
+   * Returns a geohash based on Lucene Spatial
+   *
+   * @param lat the input latitude Y
+   * @param lon the input longitude X
+   * @return
+   */
+  public String geoHash(Double lat, Double lon) {
+    String encodeLatLon = GeohashUtils.encodeLatLon(lat, lon);
+    return encodeLatLon;
+  }
+
+  /**
+   * Returns the X and Y point for the geohash. Element 0 is the X (longitude)
+   * element 1 is the Y (latitude)
+   *
+   * @param geohash
+   * @return
+   */
+  public double[] geoHashToPoint(String geohash) {
+    Point decode = GeohashUtils.decode(geohash, SpatialContext.GEO);
+    double[] coords = new double[]{decode.getX(), decode.getY()};
+    return coords;
+  }
 
+  /**
+   * Returns the X and Y point for the geohash. Element 0 is the X (longitude)
+   * element 1 is the Y (latitude)
+   *
+   * @param geohash
+   * @return
+   */
+  public String geoHashToPointStr(String geohash) {
+    Point decode = GeohashUtils.decode(geohash, SpatialContext.GEO);
+    String point = decode.getX() + "," + decode.getY();
+    return point;
   }
 
   /**