You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/01/18 19:39:06 UTC
svn commit: r1725318 - in /tika/trunk: CHANGES.txt
tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
Author: mattmann
Date: Mon Jan 18 18:39:06 2016
New Revision: 1725318
URL: http://svn.apache.org/viewvc?rev=1725318&view=rev
Log:
Fix for TIKA-1834: Fix for GeoTopic parser holding state while running Tika server contributed by smadha <ms...@usc.edu> this closes #71.
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1725318&r1=1725317&r2=1725318&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Jan 18 18:39:06 2016
@@ -1,5 +1,8 @@
Release 1.12 - Current Development
+ * Fix bug in GeoTopicParser where NER is reused instead of instantiated
+ with each request (TIKA-1834).
+
* Upgrade rome to 1.5.1 && Downgrade Rome dependency to 0.9 to avoid
nasty NPE (TIKA-1820, TIKA-1516)
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java?rev=1725318&r1=1725317&r2=1725318&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java Mon Jan 18 18:39:06 2016
@@ -37,6 +37,9 @@ import org.apache.tika.parser.geo.topic.
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+
public class GeoParser extends AbstractParser {
private static final long serialVersionUID = -2241391757440215491L;
private static final Logger LOG = Logger.getLogger(GeoParser.class.getName());
@@ -50,7 +53,7 @@ public class GeoParser extends AbstractP
private boolean initialized;
private URL modelUrl;
- private NameEntityExtractor extractor;
+ private NameFinderME nameFinder;
private boolean available;
@Override
@@ -76,12 +79,14 @@ public class GeoParser extends AbstractP
this.available = modelUrl != null && gazetteerClient.checkAvail();
if (this.available) {
- try {
- this.extractor = new NameEntityExtractor(modelUrl);
+ try {
+ TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
+ this.nameFinder = new NameFinderME(model);
} catch (Exception e) {
LOG.warning("Named Entity Extractor setup failed: " + e);
this.available = false;
}
+
}
initialized = true;
}
@@ -98,6 +103,14 @@ public class GeoParser extends AbstractP
if (!isAvailable()) {
return;
}
+ NameEntityExtractor extractor = null;
+
+ try {
+ extractor = new NameEntityExtractor(nameFinder);
+ } catch (Exception e) {
+ LOG.warning("Named Entity Extractor setup failed: " + e);
+ return;
+ }
/*----------------get locationNameEntities and best nameEntity for the input stream---------------------*/
extractor.getAllNameEntitiesfromInput(stream);
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java?rev=1725318&r1=1725317&r2=1725318&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java Mon Jan 18 18:39:06 2016
@@ -18,9 +18,10 @@
package org.apache.tika.parser.geo.topic;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import java.io.IOException;
import java.io.InputStream;
-import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -29,12 +30,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.util.Span;
import org.apache.commons.io.IOUtils;
-import static java.nio.charset.StandardCharsets.UTF_8;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.util.Span;
public class NameEntityExtractor {
ArrayList<String> locationNameEntities;
@@ -42,11 +41,10 @@ public class NameEntityExtractor {
private HashMap<String, Integer> tf;
private final NameFinderME nameFinder;
- public NameEntityExtractor(URL modelUrl) throws IOException {
+ public NameEntityExtractor(NameFinderME nameFinder) throws IOException {
this.locationNameEntities = new ArrayList<String>();
this.bestNameEntity = null;
- TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
- this.nameFinder = new NameFinderME(model);
+ this.nameFinder = nameFinder;
this.tf = new HashMap<String, Integer>();
}