You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/03/02 06:42:12 UTC
[03/20] tika git commit: Update NLTKNERecogniser.java
Update NLTKNERecogniser.java
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/db2b4757
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/db2b4757
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/db2b4757
Branch: refs/heads/master
Commit: db2b475733dffca63143551a5f1ddd89d97f0960
Parents: 2b99eea
Author: Manali Shah <ma...@usc.edu>
Authored: Tue Feb 2 00:33:17 2016 -0800
Committer: Manali Shah <ma...@usc.edu>
Committed: Tue Feb 2 00:33:17 2016 -0800
----------------------------------------------------------------------
.../tika/parser/ner/nltk/NLTKNERecogniser.java | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/db2b4757/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
index eb216ea..cb152f3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
@@ -44,11 +44,9 @@ import org.apache.http.message.BasicNameValuePair;
/**
* This class offers an implementation of {@link NERecogniser} based on
- * CRF classifiers from Stanford CoreNLP. This NER requires additional setup,
- * due to runtime binding to Stanford CoreNLP.
+ * ne_chunk() module of NLTK. This NER requires additional setup,
+ * due to Http requests to an endpoint server that runs NLTK.
* See <a href="http://wiki.apache.org/tika/TikaAndNER#NLTK">
- * Tika NER Wiki</a> for configuring this recogniser.
- * @see NERecogniser
*
*/
public class NLTKNERecogniser implements NERecogniser {
@@ -56,6 +54,10 @@ public class NLTKNERecogniser implements NERecogniser {
private static final Logger LOG = LoggerFactory.getLogger(NLTKNERecogniser.class);
private final static String USER_AGENT = "Mozilla/5.0";
private static boolean available = false;
+
+ /**
+ * some common entities identified by NLTK
+ */
public static final Set<String> ENTITY_TYPES = new HashSet<String>(){{
add(PERSON);
add(TIME);
@@ -70,7 +72,6 @@ public class NLTKNERecogniser implements NERecogniser {
public NLTKNERecogniser(){
try {
-
String url = "http://localhost:5000/";
HttpClient client = HttpClientBuilder.create().build();
HttpGet get = new HttpGet(url);
@@ -93,9 +94,8 @@ public class NLTKNERecogniser implements NERecogniser {
/**
- *
- * @return {@code true} if model was available, valid and was able to initialise the classifier.
- * returns {@code false} when this recogniser is not available for service.
+ * @return {@code true} if server endpoint is available.
+ * returns {@code false} if server endpoint is not avaliable for service.
*/
public boolean isAvailable() {
return available;
@@ -120,7 +120,6 @@ public class NLTKNERecogniser implements NERecogniser {
String url = "http://localhost:5000/nltk";
HttpClient client = HttpClientBuilder.create().build();
HttpPost post = new HttpPost(url);
- // add header
post.setHeader("User-Agent", USER_AGENT);
List<NameValuePair> urlParameters = new ArrayList<NameValuePair>();
urlParameters.add(new BasicNameValuePair("text", text));
@@ -153,7 +152,6 @@ public class NLTKNERecogniser implements NERecogniser {
}
ENTITY_TYPES.clear();
ENTITY_TYPES.addAll(entities.keySet());
- LOG.info("returning this:" + entities.keySet().toString());
return entities;
}