You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2015/06/12 12:09:12 UTC

svn commit: r1685056 - /stanbol/trunk/enhancement-engines/opennlp/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java

Author: rwesten
Date: Fri Jun 12 10:09:12 2015
New Revision: 1685056

URL: http://svn.apache.org/r1685056
Log:
STANBOL-1423: WIth OpenNLP 1.6.0 Span now reports the confidence. So we use this feature for the NER engine. Also added code that ignores confidence values < 0.5 as it seams that those are currently not working for perceptron models

Modified:
    stanbol/trunk/enhancement-engines/opennlp/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java

Modified: stanbol/trunk/enhancement-engines/opennlp/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/opennlp/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1685056&r1=1685055&r2=1685056&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/opennlp/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java (original)
+++ stanbol/trunk/enhancement-engines/opennlp/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java Fri Jun 12 10:09:12 2015
@@ -267,8 +267,10 @@ public abstract class NEREngineCore
                     if(occurrence.type != null){
                         g.add(new TripleImpl(textAnnotation, DC_TYPE, occurrence.type));
                     }
-                    g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory
-                            .createTypedLiteral(occurrence.confidence)));
+                    if(occurrence.confidence != null){
+                        g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory
+                                .createTypedLiteral(occurrence.confidence)));
+                    }
                     if (occurrence.start != null && occurrence.end != null) {
                         g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory
                                 .createTypedLiteral(occurrence.start)));
@@ -556,9 +558,21 @@ public abstract class NEREngineCore
             for (int j = 0; j < nameSpans.length; j++) {
                 String name = sentence.substring(tokenSpans[nameSpans[j].getStart()].getStart(), 
                     tokenSpans[nameSpans[j].getEnd()-1].getEnd());
-                Double confidence = 1.0;
-                for (int k = nameSpans[j].getStart(); k < nameSpans[j].getEnd(); k++) {
-                    confidence *= probs[k];
+                //NOTE: With OpenNLP 1.6 the probability is now stored in the span
+                double prob = nameSpans[j].getProb();
+                //prob == 0.0 := unspecified
+                Double confidence = prob != 0.0 ? Double.valueOf(prob) : null;
+                if(confidence == null){ //fall back to the old if it is not set.
+                    for (int k = nameSpans[j].getStart(); k < nameSpans[j].getEnd(); k++) {
+                        prob *= probs[k];
+                    }
+                    confidence = Double.valueOf(prob);
+                } else if(confidence < 0.5d){
+                    //It looks like as if preceptron based models do return
+                    //invalid probabilities. As it is expected the Named Entities
+                    //with a probability < 50% are not even returned by finder.find(..)
+                    //we will just ignore confidence values < 0.5 here
+                    confidence = null;
                 }
                 int start = tokenSpans[nameSpans[j].getStart()].getStart();
                 int absoluteStart = sentenceSpans[i].getStart() + start;