You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/10/17 09:06:16 UTC
svn commit: r1532986 - in
/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight:
annotate/ candidates/ model/ utils/
Author: rwesten
Date: Thu Oct 17 07:06:16 2013
New Revision: 1532986
URL: http://svn.apache.org/r1532986
Log:
fix for STANBOL-1069; also some improvements to the Annotate Service done while looking into STANBOL-848 (e.g. added equals and hascode for Annotation class)
Modified:
stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java
stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java
stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java
stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java
Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java Thu Oct 17 07:06:16 2013
@@ -39,6 +39,7 @@ import java.net.URLEncoder;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
+import java.util.HashMap;
import java.util.Map;
import org.apache.clerezza.rdf.core.Language;
@@ -53,6 +54,7 @@ import org.apache.felix.scr.annotations.
import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
import org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm;
import org.apache.stanbol.enhancer.engines.dbpspotlight.utils.SpotlightEngineUtils;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -193,12 +195,13 @@ public class DBPSpotlightAnnotateEnhance
String text = SpotlightEngineUtils.getPlainContent(ci);
Collection<Annotation> dbpslGraph = doPostRequest(text,ci.getUri());
+ Map<SurfaceForm,UriRef> surfaceForm2TextAnnotation = new HashMap<SurfaceForm,UriRef>();
if (dbpslGraph != null) {
// Acquire a write lock on the ContentItem when adding the
// enhancements
ci.getLock().writeLock().lock();
try {
- createEnhancements(dbpslGraph, ci, text, language);
+ createEnhancements(dbpslGraph, ci, text, language, surfaceForm2TextAnnotation);
if (log.isDebugEnabled()) {
Serializer serializer = Serializer.getInstance();
ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
@@ -231,22 +234,18 @@ public class DBPSpotlightAnnotateEnhance
* the content item
*/
protected void createEnhancements(Collection<Annotation> occs,
- ContentItem ci, String text, Language language) {
- //we need to create multiple EntityAnnotations even for the same
- //suggested Entity, as the scores will be different
- //HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
+ ContentItem ci, String text, Language language,
+ Map<SurfaceForm,UriRef> surfaceForm2TextAnnotation) {
for (Annotation occ : occs) {
- UriRef textAnnotation = SpotlightEngineUtils.createTextEnhancement(
- occ.surfaceForm, this, ci, text, language);
-
-// if (entityAnnotationMap.containsKey(occ.uri)) {
-// model.add(new TripleImpl(entityAnnotationMap.get(occ.uri),
-// DC_RELATION, textAnnotation));
-// } else {
- SpotlightEngineUtils.createEntityAnnotation(occ, this, ci, textAnnotation, language);
-// entityAnnotationMap.put(occ.uri, entityAnnotation);
+ UriRef textAnnotation = surfaceForm2TextAnnotation.get(occ.surfaceForm);
+ if(textAnnotation == null){ //not yet written ... create a new
+ textAnnotation = SpotlightEngineUtils.createTextEnhancement(
+ occ.surfaceForm, this, ci, text, language);
+ surfaceForm2TextAnnotation.put(occ.surfaceForm,textAnnotation);
}
+ SpotlightEngineUtils.createEntityAnnotation(occ, this, ci, textAnnotation, language);
}
+ }
/**
Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java Thu Oct 17 07:06:16 2013
@@ -247,7 +247,7 @@ public class DBPSpotlightCandidatesEnhan
CandidateResource resource = resources.next();
UriRef entityAnnotation = SpotlightEngineUtils.createEntityAnnotation(
resource, this, ci, textAnnotation);
- entityAnnotationMap.put(resource.uri, entityAnnotation);
+ entityAnnotationMap.put(resource.localName, entityAnnotation);
}
if (entityAnnotationMap.containsKey(occ.name)) {
model.add(new TripleImpl(entityAnnotationMap.get(occ.name),
Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java Thu Oct 17 07:06:16 2013
@@ -144,10 +144,9 @@ public class Annotation {
dbpslann.support = (new Integer(node.getAttribute("support")))
.intValue();
dbpslann.types = node.getAttribute("types");
- dbpslann.surfaceForm = new SurfaceForm();
- dbpslann.surfaceForm.name = node.getAttribute("surfaceForm");
- dbpslann.surfaceForm.offset = (new Integer(node.getAttribute("offset")))
- .intValue();
+ dbpslann.surfaceForm = new SurfaceForm(
+ new Integer(node.getAttribute("offset")),
+ node.getAttribute("surfaceForm"));
//set the type of the surface form
List<String> dbpediaTypes = dbpslann.getDbpediaTypeNames();
if(!dbpediaTypes.isEmpty()){
Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java Thu Oct 17 07:06:16 2013
@@ -21,6 +21,7 @@ import static org.apache.stanbol.enhance
import java.util.Collection;
import java.util.HashSet;
+import org.apache.clerezza.rdf.core.UriRef;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@@ -36,7 +37,7 @@ import org.w3c.dom.NodeList;
public class CandidateResource {
public String label;
- public String uri;
+ public String localName;
public double contextualScore;
public double percentageOfSecondRank;
public double support;
@@ -47,10 +48,15 @@ public class CandidateResource {
return String
.format("[label=%s, uri=%s, contextualScore=%d, percentageOfSecondRank=%d, contextualScore=%d, "
+ "percentageOfSecondRank=%d, contextualScore=%d]",
- label, uri, contextualScore, percentageOfSecondRank,
+ label, localName, contextualScore, percentageOfSecondRank,
support, priorScore, finalScore);
}
+ public UriRef getUri(){
+ return new UriRef(new StringBuilder("http://dbpedia.org/resource/")
+ .append(localName).toString());
+ }
+
/**
* This method creates the Collection of surface forms, which the method
* <code>createEnhancement</code> adds to the meta data of the content item
@@ -77,7 +83,7 @@ public class CandidateResource {
Element r = (Element) n;
CandidateResource resource = new CandidateResource();
resource.label = r.getAttribute("label");
- resource.uri = r.getAttribute("uri");
+ resource.localName = r.getAttribute("uri");
resource.contextualScore = (new Double(
r.getAttribute("contextualScore"))).doubleValue();
resource.percentageOfSecondRank = (new Double(
Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java Thu Oct 17 07:06:16 2013
@@ -37,11 +37,34 @@ import org.w3c.dom.NodeList;
*/
public class SurfaceForm {
- public String name;
+ public final String name;
public String type;
- public Integer offset;
- public List<CandidateResource> resources = new ArrayList<CandidateResource>();
+ public final Integer offset;
+ public final List<CandidateResource> resources = new ArrayList<CandidateResource>();
+ public SurfaceForm(Integer offset, String name){
+ if(name == null || offset == null){
+ throw new IllegalArgumentException("Offset and name MUST NOT be NULL");
+ }
+ this.name = name;
+ this.offset = offset;
+ }
+
+ @Override
+ public int hashCode() {
+ return name.hashCode()+offset.hashCode();
+ }
+ @Override
+ public boolean equals(Object o) {
+ if(o instanceof SurfaceForm && name.equals(((SurfaceForm)o).name) &&
+ offset.equals(((SurfaceForm)o).offset)){
+ return (type == null && ((SurfaceForm)o).type == null) ||
+ (type != null && type.equals(((SurfaceForm)o).type));
+ } else {
+ return false;
+ }
+ }
+
public String toString() {
return String.format("[name=%s, offset=%i, type=%s]", name, offset,
type);
@@ -69,10 +92,9 @@ public class SurfaceForm {
}
protected static SurfaceForm parseSerfaceForm(Element node) {
- SurfaceForm dbpslann = new SurfaceForm();
- dbpslann.name = node.getAttribute("name");
- dbpslann.offset = (new Integer(node.getAttribute("offset")))
- .intValue();
+ SurfaceForm dbpslann = new SurfaceForm(
+ new Integer(node.getAttribute("offset")),
+ node.getAttribute("name"));
dbpslann.type = node.getAttribute("type");
return dbpslann;
}
Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java Thu Oct 17 07:06:16 2013
@@ -62,6 +62,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.osgi.service.cm.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -243,7 +244,7 @@ public class SpotlightEngineUtils {
model.add(new TripleImpl(entityAnnotation,
ENHANCER_ENTITY_LABEL, label));
model.add(new TripleImpl(entityAnnotation,
- ENHANCER_ENTITY_REFERENCE, new UriRef(resource.uri)));
+ ENHANCER_ENTITY_REFERENCE, resource.getUri()));
model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE,
literalFactory.createTypedLiteral(resource.contextualScore)));
model.add(new TripleImpl(entityAnnotation,PROPERTY_PERCENTAGE_OF_SECOND_RANK,