You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/01/18 19:39:06 UTC

svn commit: r1725318 - in /tika/trunk: CHANGES.txt tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java

Author: mattmann
Date: Mon Jan 18 18:39:06 2016
New Revision: 1725318

URL: http://svn.apache.org/viewvc?rev=1725318&view=rev
Log:
Fix for TIKA-1834: Fix for GeoTopic parser holding state while running Tika server contributed by smadha <ms...@usc.edu> this closes #71.

Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1725318&r1=1725317&r2=1725318&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Jan 18 18:39:06 2016
@@ -1,5 +1,8 @@
 Release 1.12 - Current Development
 
+  * Fix bug in GeoTopicParser where NER is reused instead of instantiated
+    with each request (TIKA-1834).
+
   * Upgrade rome to 1.5.1 && Downgrade Rome dependency to 0.9 to avoid 
     nasty NPE (TIKA-1820, TIKA-1516)
 

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java?rev=1725318&r1=1725317&r2=1725318&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java Mon Jan 18 18:39:06 2016
@@ -37,6 +37,9 @@ import org.apache.tika.parser.geo.topic.
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+
 public class GeoParser extends AbstractParser {
     private static final long serialVersionUID = -2241391757440215491L;
     private static final Logger LOG = Logger.getLogger(GeoParser.class.getName());
@@ -50,7 +53,7 @@ public class GeoParser extends AbstractP
     
     private boolean initialized;
     private URL modelUrl;
-    private NameEntityExtractor extractor;
+    private NameFinderME nameFinder;
     private boolean available;
 
     @Override
@@ -76,12 +79,14 @@ public class GeoParser extends AbstractP
         this.available = modelUrl != null && gazetteerClient.checkAvail();
         
         if (this.available) {
-            try {
-                this.extractor = new NameEntityExtractor(modelUrl);
+        	try {
+        		TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
+                this.nameFinder = new NameFinderME(model);
             } catch (Exception e) {
                 LOG.warning("Named Entity Extractor setup failed: " + e);
                 this.available = false;
             }
+        	
         }
         initialized = true;
     }
@@ -98,6 +103,14 @@ public class GeoParser extends AbstractP
         if (!isAvailable()) {
             return;
         }
+        NameEntityExtractor extractor = null;
+        
+        try {
+            extractor = new NameEntityExtractor(nameFinder);
+        } catch (Exception e) {
+            LOG.warning("Named Entity Extractor setup failed: " + e);
+            return;
+        }
 
         /*----------------get locationNameEntities and best nameEntity for the input stream---------------------*/
         extractor.getAllNameEntitiesfromInput(stream);

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java?rev=1725318&r1=1725317&r2=1725318&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java Mon Jan 18 18:39:06 2016
@@ -18,9 +18,10 @@
 package org.apache.tika.parser.geo.topic;
 
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import java.io.IOException;
 import java.io.InputStream;
-import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -29,12 +30,10 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.util.Span;
 import org.apache.commons.io.IOUtils;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.util.Span;
 
 public class NameEntityExtractor {
     ArrayList<String> locationNameEntities;
@@ -42,11 +41,10 @@ public class NameEntityExtractor {
     private HashMap<String, Integer> tf;
     private final NameFinderME nameFinder;
 
-    public NameEntityExtractor(URL modelUrl) throws IOException {
+    public NameEntityExtractor(NameFinderME nameFinder) throws IOException {
         this.locationNameEntities = new ArrayList<String>();
         this.bestNameEntity = null;
-        TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
-        this.nameFinder = new NameFinderME(model);
+        this.nameFinder = nameFinder;
         this.tf = new HashMap<String, Integer>();
     }