You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/11/15 21:01:22 UTC

svn commit: r1714495 - in /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic: GeoParserConfig.java NameEntityExtractor.java

Author: nick
Date: Sun Nov 15 20:01:22 2015
New Revision: 1714495

URL: http://svn.apache.org/viewvc?rev=1714495&view=rev
Log:
Fix inconsistent whitespace

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java?rev=1714495&r1=1714494&r2=1714495&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java Sun Nov 15 20:01:22 2015
@@ -23,34 +23,31 @@ import java.net.MalformedURLException;
 import java.net.URL;
 
 public class GeoParserConfig implements Serializable {
+    private static final long serialVersionUID = -3167692634278575818L;
+    private URL nerModelUrl = null;
 
-	private static final long serialVersionUID = 2L;
-	private URL nerModelUrl = null;
-
-	public GeoParserConfig() {
-		this.nerModelUrl = GeoParserConfig.class.getResource("en-ner-location.bin");
-	}
-
-	public void setNERModelPath(String path) {
-		if (path == null)
-			return;
-		File file = new File(path);
-		if (file.isDirectory() || !file.exists()) {
-			return;
-		}
-		try {
-			this.nerModelUrl = file.toURI().toURL();
-		} catch (MalformedURLException e) {
-			throw new RuntimeException(e);
-		}
-	}
-
-	public void setNerModelUrl(URL url) {
-		this.nerModelUrl = url;
-	}
-
-	public URL getNerModelUrl() {
-		return nerModelUrl;
-	}
-
+    public GeoParserConfig() {
+        this.nerModelUrl = GeoParserConfig.class.getResource("en-ner-location.bin");
+    }
+
+    public void setNERModelPath(String path) {
+        if (path == null)
+            return;
+        File file = new File(path);
+        if (file.isDirectory() || !file.exists()) {
+            return;
+        }
+        try {
+            this.nerModelUrl = file.toURI().toURL();
+        } catch (MalformedURLException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public void setNerModelUrl(URL url) {
+        this.nerModelUrl = url;
+    }
+    public URL getNerModelUrl() {
+        return nerModelUrl;
+    }
 }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java?rev=1714495&r1=1714494&r2=1714495&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java Sun Nov 15 20:01:22 2015
@@ -37,93 +37,88 @@ import org.apache.commons.io.IOUtils;
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 public class NameEntityExtractor {
-
-	ArrayList<String> locationNameEntities;
-	String bestNameEntity;
-	private HashMap<String, Integer> tf;
-	private final NameFinderME nameFinder;
-
-	public NameEntityExtractor(URL modelUrl) throws IOException {
-		this.locationNameEntities = new ArrayList<String>();
-		this.bestNameEntity = null;
-		TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
-		this.nameFinder = new NameFinderME(model);
-		this.tf = new HashMap<String, Integer>();
-	}
-
-	/*
-	 * Use OpenNLP to extract location names that's appearing in the steam.
-	 * OpenNLP's default Name Finder accuracy is not very good, please refer to
-	 * its documentation.
-	 * 
-	 * @param stream stream that passed from this.parse()
-	 */
-
-	public void getAllNameEntitiesfromInput(InputStream stream)
-			throws IOException {
-
-
-		String[] in = IOUtils.toString(stream, UTF_8).split(" ");
-		Span nameE[];
-		//name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
-		synchronized (nameFinder) {
-			nameE = nameFinder.find(in);
-			//the same name finder is reused, so clear adaptive data
-			nameFinder.clearAdaptiveData();
-		}
-
-		String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
-		spanNames = spanNames.substring(1, spanNames.length() - 1);
-		String[] tmp = spanNames.split(",");
-
-		for (String name : tmp) {
-			name = name.trim();
-			this.locationNameEntities.add(name);
-		}
-
-
-	}
-
-	/*
-	 * Get the best location entity extracted from the input stream. Simply
-	 * return the most frequent entity, If there several highest frequent
-	 * entity, pick one randomly. May not be the optimal solution, but works.
-	 * 
-	 * @param locationNameEntities OpenNLP name finder's results, stored in
-	 * ArrayList
-	 */
-	public void getBestNameEntity() {
-		if (this.locationNameEntities.size() == 0)
-			return;
-
-		for (int i = 0; i < this.locationNameEntities.size(); ++i) {
-			if (tf.containsKey(this.locationNameEntities.get(i)))
-				tf.put(this.locationNameEntities.get(i),
-						tf.get(this.locationNameEntities.get(i)) + 1);
-			else
-				tf.put(this.locationNameEntities.get(i), 1);
-		}
-		int max = 0;
-		List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(
-				tf.entrySet());
-		Collections.shuffle(list);
-		Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
-			public int compare(Map.Entry<String, Integer> o1,
-					Map.Entry<String, Integer> o2) {
-				return o2.getValue().compareTo(o1.getValue()); // descending
-				// order
-
-			}
-		});
-
-		this.locationNameEntities.clear();// update so that they are in
-											// descending order
-		for (Map.Entry<String, Integer> entry : list) {
-			this.locationNameEntities.add(entry.getKey());
-			if (entry.getValue() > max) {
-				max = entry.getValue();
-				this.bestNameEntity = entry.getKey();
-			}
-		}
-	}
+    ArrayList<String> locationNameEntities;
+    String bestNameEntity;
+    private HashMap<String, Integer> tf;
+    private final NameFinderME nameFinder;
+
+    public NameEntityExtractor(URL modelUrl) throws IOException {
+        this.locationNameEntities = new ArrayList<String>();
+        this.bestNameEntity = null;
+        TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
+        this.nameFinder = new NameFinderME(model);
+        this.tf = new HashMap<String, Integer>();
+    }
+
+    /*
+     * Use OpenNLP to extract location names that's appearing in the steam.
+     * OpenNLP's default Name Finder accuracy is not very good, please refer to
+     * its documentation.
+     * 
+     * @param stream stream that passed from this.parse()
+     */
+    public void getAllNameEntitiesfromInput(InputStream stream) throws IOException {
+        String[] in = IOUtils.toString(stream, UTF_8).split(" ");
+        Span nameE[];
+        
+        //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
+        synchronized (nameFinder) {
+            nameE = nameFinder.find(in);
+            //the same name finder is reused, so clear adaptive data
+            nameFinder.clearAdaptiveData();
+        }
+
+        String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
+        spanNames = spanNames.substring(1, spanNames.length() - 1);
+        String[] tmp = spanNames.split(",");
+
+        for (String name : tmp) {
+            name = name.trim();
+            this.locationNameEntities.add(name);
+        }
+
+
+    }
+
+    /*
+     * Get the best location entity extracted from the input stream. Simply
+     * return the most frequent entity, If there several highest frequent
+     * entity, pick one randomly. May not be the optimal solution, but works.
+     * 
+     * @param locationNameEntities OpenNLP name finder's results, stored in
+     * ArrayList
+     */
+    public void getBestNameEntity() {
+        if (this.locationNameEntities.size() == 0)
+            return;
+
+        for (int i = 0; i < this.locationNameEntities.size(); ++i) {
+            if (tf.containsKey(this.locationNameEntities.get(i)))
+                tf.put(this.locationNameEntities.get(i),
+                        tf.get(this.locationNameEntities.get(i)) + 1);
+            else
+                tf.put(this.locationNameEntities.get(i), 1);
+        }
+        int max = 0;
+        List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(
+                tf.entrySet());
+        Collections.shuffle(list);
+        Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
+            public int compare(Map.Entry<String, Integer> o1,
+                    Map.Entry<String, Integer> o2) {
+                // Descending Order
+                return o2.getValue().compareTo(o1.getValue());
+            }
+        });
+
+        this.locationNameEntities.clear();// update so that they are in
+                                          // descending order
+        for (Map.Entry<String, Integer> entry : list) {
+            this.locationNameEntities.add(entry.getKey());
+            if (entry.getValue() > max) {
+                max = entry.getValue();
+                this.bestNameEntity = entry.getKey();
+            }
+        }
+    }
 }