You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2013/11/14 13:16:46 UTC
svn commit: r1541887 - in
/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker:
GazateerIndexer.java GazateerSearcher.java GeoEntityLinkerSetupUtils.java
Author: markg
Date: Thu Nov 14 12:16:46 2013
New Revision: 1541887
URL: http://svn.apache.org/r1541887
Log:
OPENNLP-579
Fixed a bug in the GazateerIndexer. Refined the SetupUtils.
Modified:
opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java
Modified: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java?rev=1541887&r1=1541886&r2=1541887&view=diff
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java (original)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java Thu Nov 14 12:16:46 2013
@@ -38,12 +38,22 @@ import org.apache.lucene.util.Version;
*/
public class GazateerIndexer {
- public enum GazType {
+ public static interface Separable {
+
+ String getSeparator();
+ }
+
+ public enum GazType implements Separable {
GEONAMES {
@Override
public String toString() {
- return "/opennlp_geoentitylinker_usgsgaz_idx";
+ return "/opennlp_geoentitylinker_geonames_idx";
+ }
+
+ @Override
+ public String getSeparator() {
+ return "\t";
}
},
USGS {
@@ -51,6 +61,11 @@ public class GazateerIndexer {
public String toString() {
return "/opennlp_geoentitylinker_usgsgaz_idx";
}
+
+ @Override
+ public String getSeparator() {
+ return "\\|";
+ }
}
}
@@ -67,24 +82,24 @@ public class GazateerIndexer {
IndexWriter w = new IndexWriter(index, config);
- readFile(gazateerInputData, w);
+ readFile(gazateerInputData, w, type);
w.commit();
w.close();
}
- public void readFile(File gazateerInputData, IndexWriter w) throws Exception {
+ public void readFile(File gazateerInputData, IndexWriter w, GazType type) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(gazateerInputData));
List<String> fields = new ArrayList<String>();
int counter = 0;
System.out.println("reading gazateer data from file...........");
while (reader.read() != -1) {
String line = reader.readLine();
- String[] values = line.split("\\|");//nga format
+ String[] values = line.split(type.getSeparator());
if (counter == 0) {
// build fields
for (String columnName : values) {
- fields.add(columnName.replace("»¿", ""));
+ fields.add(columnName.replace("»¿", "").trim());
}
@@ -102,6 +117,7 @@ public class GazateerIndexer {
}
}
-
+ w.commit();
+ System.out.println("Completed indexing gaz! index name is: " + type.toString());
}
}
Modified: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java?rev=1541887&r1=1541886&r2=1541887&view=diff
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java (original)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java Thu Nov 14 12:16:46 2013
@@ -70,9 +70,10 @@ public class GazateerSearcher {
geonamesReader = DirectoryReader.open(geonamesIndex);
geonamesSearcher = new IndexSearcher(geonamesReader);
geonamesAnalyzer = new StandardAnalyzer(Version.LUCENE_45);
+
}
- String luceneQueryString = "FULL_NAME_ND_RO:" + searchString + " AND CC1:" + code.toLowerCase() + "^100";
+ String luceneQueryString = "FULL_NAME_ND_RO:" + searchString + " AND CC1:" + code.toLowerCase() + "^10000";
QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, geonamesAnalyzer);
Query q = parser.parse(luceneQueryString);
Modified: opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java?rev=1541887&r1=1541886&r2=1541887&view=diff
==============================================================================
--- opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java (original)
+++ opennlp/sandbox/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java Thu Nov 14 12:16:46 2013
@@ -83,7 +83,7 @@ public class GeoEntityLinkerSetupUtils {
}
}
}
- System.out.println("Document processing complete. Writing traininf data to file");
+ System.out.println("Document processing complete. Writing training data to "+ annotationOutFile.getAbsolutePath());
writer.close();
System.out.println("Building Doccat model...");
DoccatModel model = null;
@@ -116,7 +116,7 @@ public class GeoEntityLinkerSetupUtils {
* @param radius
* @return
*/
- public static Map<String, ArrayList<String>> modelCountryContext(String docText, CountryContext additionalContext, int radius) {
+ private static Map<String, ArrayList<String>> modelCountryContext(String docText, CountryContext additionalContext, int radius) {
Map<String, ArrayList< String>> featureBags = new HashMap<>();
Map<String, Set<Integer>> countryMentions = additionalContext.getCountryMentions();
/**